diff --git a/Data/RDTests.sqlt b/Data/RDTests.sqlt index dd9d95c8f00..4bbd6abf6b1 100644 Binary files a/Data/RDTests.sqlt and b/Data/RDTests.sqlt differ diff --git a/rdkit/ML/AnalyzeComposite.py b/rdkit/ML/AnalyzeComposite.py deleted file mode 100755 index 643aedbe5f4..00000000000 --- a/rdkit/ML/AnalyzeComposite.py +++ /dev/null @@ -1,336 +0,0 @@ -# $Id$ -# -# Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC -# -# @@ All Rights Reserved @@ -# This file is part of the RDKit. -# The contents are covered by the terms of the BSD license -# which is included in the file license.txt, found at the root -# of the RDKit source tree. -# -""" command line utility to report on the contributions of descriptors to -tree-based composite models - -Usage: AnalyzeComposite [optional args] - - : file name(s) of pickled composite model(s) - (this is the name of the db table if using a database) - - Optional Arguments: - - -n number: the number of levels of each model to consider - - -d dbname: the database from which to read the models - - -N Note: the note string to search for to pull models from the database - - -v: be verbose whilst screening -""" -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) - -import pickle -import sys - -import numpy - -from rdkit.Dbase.DbConnection import DbConnect -from rdkit.ML import ScreenComposite -from rdkit.ML.Data import Stats -from rdkit.ML.DecTree import Tree, TreeUtils - -__VERSION_STRING = "2.2.0" - - -def ProcessIt(composites, nToConsider=3, verbose=0): - composite = composites[0] - nComposites = len(composites) - ns = composite.GetDescriptorNames() - # nDesc = len(ns)-2 - if len(ns) > 2: - globalRes = {} - - nDone = 1 - descNames = {} - for composite in composites: - if verbose > 0: - print('#------------------------------------') - print('Doing: ', nDone) - nModels = len(composite) - nDone += 1 - res = {} - for i in range(len(composite)): - model = composite.GetModel(i) - if isinstance(model, Tree.TreeNode): - levels = TreeUtils.CollectLabelLevels(model, {}, 0, nToConsider) - TreeUtils.CollectDescriptorNames(model, descNames, 0, nToConsider) - for descId in levels.keys(): - v = res.get(descId, numpy.zeros(nToConsider, float)) - v[levels[descId]] += 1. / nModels - res[descId] = v - for k in res: - v = globalRes.get(k, numpy.zeros(nToConsider, float)) - v += res[k] / nComposites - globalRes[k] = v - if verbose > 0: - for k in res.keys(): - name = descNames[k] - strRes = ', '.join(['%4.2f' % x for x in res[k]]) - print('%s,%s,%5.4f' % (name, strRes, sum(res[k]))) - - print() - - if verbose >= 0: - print('# Average Descriptor Positions') - retVal = [] - for k in globalRes: - name = descNames[k] - if verbose >= 0: - strRes = ', '.join(['%4.2f' % x for x in globalRes[k]]) - print('%s,%s,%5.4f' % (name, strRes, sum(globalRes[k]))) - tmp = [name] - tmp.extend(globalRes[k]) - tmp.append(sum(globalRes[k])) - retVal.append(tmp) - if verbose >= 0: - print() - else: - retVal = [] - return retVal - - -def ErrorStats(conn, where, enrich=1): - fields = ('overall_error,holdout_error,overall_result_matrix,' + - 'holdout_result_matrix,overall_correct_conf,overall_incorrect_conf,' + - 'holdout_correct_conf,holdout_incorrect_conf') - try: - data = conn.GetData(fields=fields, where=where) - except Exception: - import traceback - traceback.print_exc() - return None - nPts = len(data) - if not nPts: - sys.stderr.write('no runs found\n') - return None - overall = numpy.zeros(nPts, float) - overallEnrich = numpy.zeros(nPts, float) - oCorConf = 0.0 - oInCorConf = 0.0 - holdout = numpy.zeros(nPts, float) - holdoutEnrich = numpy.zeros(nPts, float) - hCorConf = 0.0 - hInCorConf = 0.0 - overallMatrix = None - holdoutMatrix = None - for i in range(nPts): - if data[i][0] is not None: - overall[i] = data[i][0] - oCorConf += data[i][4] - oInCorConf += data[i][5] - if data[i][1] is not None: - holdout[i] = data[i][1] - haveHoldout = 1 - else: - haveHoldout = 0 - tmpOverall = 1. * eval(data[i][2]) - if enrich >= 0: - overallEnrich[i] = ScreenComposite.CalcEnrichment(tmpOverall, tgt=enrich) - if haveHoldout: - tmpHoldout = 1. * eval(data[i][3]) - if enrich >= 0: - holdoutEnrich[i] = ScreenComposite.CalcEnrichment(tmpHoldout, tgt=enrich) - if overallMatrix is None: - if data[i][2] is not None: - overallMatrix = tmpOverall - if haveHoldout and data[i][3] is not None: - holdoutMatrix = tmpHoldout - else: - overallMatrix += tmpOverall - if haveHoldout: - holdoutMatrix += tmpHoldout - if haveHoldout: - hCorConf += data[i][6] - hInCorConf += data[i][7] - - avgOverall = sum(overall) / nPts - oCorConf /= nPts - oInCorConf /= nPts - overallMatrix /= nPts - oSort = numpy.argsort(overall) - oMin = overall[oSort[0]] - overall -= avgOverall - devOverall = numpy.sqrt(sum(overall**2) / (nPts - 1)) - res = {} - res['oAvg'] = 100 * avgOverall - res['oDev'] = 100 * devOverall - res['oCorrectConf'] = 100 * oCorConf - res['oIncorrectConf'] = 100 * oInCorConf - res['oResultMat'] = overallMatrix - res['oBestIdx'] = oSort[0] - res['oBestErr'] = 100 * oMin - - if enrich >= 0: - mean, dev = Stats.MeanAndDev(overallEnrich) - res['oAvgEnrich'] = mean - res['oDevEnrich'] = dev - - if haveHoldout: - avgHoldout = sum(holdout) / nPts - hCorConf /= nPts - hInCorConf /= nPts - holdoutMatrix /= nPts - hSort = numpy.argsort(holdout) - hMin = holdout[hSort[0]] - holdout -= avgHoldout - devHoldout = numpy.sqrt(sum(holdout**2) / (nPts - 1)) - res['hAvg'] = 100 * avgHoldout - res['hDev'] = 100 * devHoldout - res['hCorrectConf'] = 100 * hCorConf - res['hIncorrectConf'] = 100 * hInCorConf - res['hResultMat'] = holdoutMatrix - res['hBestIdx'] = hSort[0] - res['hBestErr'] = 100 * hMin - if enrich >= 0: - mean, dev = Stats.MeanAndDev(holdoutEnrich) - res['hAvgEnrich'] = mean - res['hDevEnrich'] = dev - return res - - -def ShowStats(statD, enrich=1): - statD = statD.copy() - statD['oBestIdx'] = statD['oBestIdx'] + 1 - txt = """ -# Error Statistics: -\tOverall: %(oAvg)6.3f%% (%(oDev)6.3f) %(oCorrectConf)4.1f/%(oIncorrectConf)4.1f -\t\tBest: %(oBestIdx)d %(oBestErr)6.3f%%""" % (statD) - if 'hAvg' in statD: - statD['hBestIdx'] = statD['hBestIdx'] + 1 - txt += """ -\tHoldout: %(hAvg)6.3f%% (%(hDev)6.3f) %(hCorrectConf)4.1f/%(hIncorrectConf)4.1f -\t\tBest: %(hBestIdx)d %(hBestErr)6.3f%% - """ % (statD) - print(txt) - print() - print('# Results matrices:') - print('\tOverall:') - tmp = numpy.transpose(statD['oResultMat']) - colCounts = sum(tmp) - rowCounts = sum(tmp, 1) - for i in range(len(tmp)): - if rowCounts[i] == 0: - rowCounts[i] = 1 - row = tmp[i] - print('\t\t', end='') - for j in range(len(row)): - print('% 6.2f' % row[j], end='') - print('\t| % 4.2f' % (100. * tmp[i, i] / rowCounts[i])) - print('\t\t', end='') - for i in range(len(tmp)): - print('------', end='') - print() - print('\t\t', end='') - for i in range(len(tmp)): - if colCounts[i] == 0: - colCounts[i] = 1 - print('% 6.2f' % (100. * tmp[i, i] / colCounts[i]), end='') - print() - if enrich > -1 and 'oAvgEnrich' in statD: - print('\t\tEnrich(%d): %.3f (%.3f)' % (enrich, statD['oAvgEnrich'], statD['oDevEnrich'])) - - if 'hResultMat' in statD: - print('\tHoldout:') - tmp = numpy.transpose(statD['hResultMat']) - colCounts = sum(tmp) - rowCounts = sum(tmp, 1) - for i in range(len(tmp)): - if rowCounts[i] == 0: - rowCounts[i] = 1 - row = tmp[i] - print('\t\t', end='') - for j in range(len(row)): - print('% 6.2f' % row[j], end='') - print('\t| % 4.2f' % (100. * tmp[i, i] / rowCounts[i])) - print('\t\t', end='') - for i in range(len(tmp)): - print('------', end='') - print() - print('\t\t', end='') - for i in range(len(tmp)): - if colCounts[i] == 0: - colCounts[i] = 1 - print('% 6.2f' % (100. * tmp[i, i] / colCounts[i]), end='') - print() - if enrich > -1 and 'hAvgEnrich' in statD: - print('\t\tEnrich(%d): %.3f (%.3f)' % (enrich, statD['hAvgEnrich'], statD['hDevEnrich'])) - - return - - -def Usage(): - print(__doc__) - sys.exit(-1) - - -if __name__ == "__main__": - import getopt - try: - args, extras = getopt.getopt(sys.argv[1:], 'n:d:N:vX', ( - 'skip', - 'enrich=', - )) - except Exception: - Usage() - - count = 3 - db = None - note = '' - verbose = 0 - skip = 0 - enrich = 1 - for arg, val in args: - if arg == '-n': - count = int(val) + 1 - elif arg == '-d': - db = val - elif arg == '-N': - note = val - elif arg == '-v': - verbose = 1 - elif arg == '--skip': - skip = 1 - elif arg == '--enrich': - enrich = int(val) - composites = [] - if db is None: - for arg in extras: - composite = pickle.load(open(arg, 'rb')) - composites.append(composite) - else: - tbl = extras[0] - conn = DbConnect(db, tbl) - if note: - where = "where note='%s'" % (note) - else: - where = '' - if not skip: - pkls = conn.GetData(fields='model', where=where) - composites = [] - for pkl in pkls: - pkl = str(pkl[0]) - comp = pickle.loads(pkl) - composites.append(comp) - - if len(composites): - ProcessIt(composites, count, verbose=verbose) - elif not skip: - print('ERROR: no composite models found') - sys.exit(-1) - - if db: - res = ErrorStats(conn, where, enrich=enrich) - if res: - ShowStats(res) diff --git a/rdkit/ML/BuildComposite.py b/rdkit/ML/BuildComposite.py deleted file mode 100755 index 3956fb1bfac..00000000000 --- a/rdkit/ML/BuildComposite.py +++ /dev/null @@ -1,1034 +0,0 @@ -# $Id$ -# -# Copyright (C) 2000-2008 greg Landrum and Rational Discovery LLC -# -# @@ All Rights Reserved @@ -# This file is part of the RDKit. -# The contents are covered by the terms of the BSD license -# which is included in the file license.txt, found at the root -# of the RDKit source tree. -# -""" command line utility for building composite models - -#DOC - -**Usage** - - BuildComposite [optional args] filename - -Unless indicated otherwise (via command line arguments), _filename_ is -a QDAT file. - -**Command Line Arguments** - - - -o *filename*: name of the output file for the pickled composite - - - -n *num*: number of separate models to add to the composite - - - -p *tablename*: store persistence data in the database - in table *tablename* - - - -N *note*: attach some arbitrary text to the persistence data - - - -b *filename*: name of the text file to hold examples from the - holdout set which are misclassified - - - -s: split the data into training and hold-out sets before building - the composite - - - -f *frac*: the fraction of data to use in the training set when the - data is split - - - -r: randomize the activities (for testing purposes). This ignores - the initial distribution of activity values and produces each - possible activity value with equal likliehood. - - - -S: shuffle the activities (for testing purposes) This produces - a permutation of the input activity values. - - - -l: locks the random number generator to give consistent sets - of training and hold-out data. This is primarily intended - for testing purposes. - - - -B: use a so-called Bayesian composite model. - - - -d *database name*: instead of reading the data from a QDAT file, - pull it from a database. In this case, the _filename_ argument - provides the name of the database table containing the data set. - - - -D: show a detailed breakdown of the composite model performance - across the training and, when appropriate, hold-out sets. - - - -P *pickle file name*: write out the pickled data set to the file - - - -F *filter frac*: filters the data before training to change the - distribution of activity values in the training set. *filter - frac* is the fraction of the training set that should have the - target value. **See note below on data filtering.** - - - -v *filter value*: filters the data before training to change the - distribution of activity values in the training set. *filter - value* is the target value to use in filtering. **See note below - on data filtering.** - - - --modelFiltFrac *model filter frac*: Similar to filter frac above, - in this case the data is filtered for each model in the composite - rather than a single overall filter for a composite. *model - filter frac* is the fraction of the training set for each model - that should have the target value (*model filter value*). - - - --modelFiltVal *model filter value*: target value to use for - filtering data before training each model in the composite. - - - -t *threshold value*: use high-confidence predictions for the - final analysis of the hold-out data. - - - -Q *list string*: the values of quantization bounds for the - activity value. See the _-q_ argument for the format of *list - string*. - - - --nRuns *count*: build *count* composite models - - - --prune: prune any models built - - - -h: print a usage message and exit. - - - -V: print the version number and exit - - *-*-*-*-*-*-*-*- Tree-Related Options -*-*-*-*-*-*-*-* - - - -g: be less greedy when training the models. - - - -G *number*: force trees to be rooted at descriptor *number*. - - - -L *limit*: provide an (integer) limit on individual model - complexity - - - -q *list string*: Add QuantTrees to the composite and use the list - specified in *list string* as the number of target quantization - bounds for each descriptor. Don't forget to include 0's at the - beginning and end of *list string* for the name and value fields. - For example, if there are 4 descriptors and you want 2 quant - bounds apiece, you would use _-q "[0,2,2,2,2,0]"_. - Two special cases: - 1) If you would like to ignore a descriptor in the model - building, use '-1' for its number of quant bounds. - 2) If you have integer valued data that should not be quantized - further, enter 0 for that descriptor. - - - --recycle: allow descriptors to be used more than once in a tree - - - --randomDescriptors=val: toggles growing random forests with val - randomly-selected descriptors available at each node. - - - *-*-*-*-*-*-*-*- KNN-Related Options -*-*-*-*-*-*-*-* - - - --doKnn: use K-Nearest Neighbors models - - - --knnK=*value*: the value of K to use in the KNN models - - - --knnTanimoto: use the Tanimoto metric in KNN models - - - --knnEuclid: use a Euclidean metric in KNN models - - *-*-*-*-*-*-*- Naive Bayes Classifier Options -*-*-*-*-*-*-*-* - - --doNaiveBayes : use Naive Bayes classifiers - - - --mEstimateVal : the value to be used in the m-estimate formula - If this is greater than 0.0, we use it to compute the conditional - probabilities by the m-estimate - - *-*-*-*-*-*-*-*- SVM-Related Options -*-*-*-*-*-*-*-* - - **** NOTE: THESE ARE DISABLED **** - -# # - --doSVM: use Support-vector machines - -# # - --svmKernel=*kernel*: choose the type of kernel to be used for -# # the SVMs. Options are: -# # The default is: - -# # - --svmType=*type*: choose the type of support-vector machine -# # to be used. Options are: -# # The default is: - -# # - --svmGamma=*gamma*: provide the gamma value for the SVMs. If this -# # is not provided, a grid search will be carried out to determine an -# # optimal *gamma* value for each SVM. - -# # - --svmCost=*cost*: provide the cost value for the SVMs. If this is -# # not provided, a grid search will be carried out to determine an -# # optimal *cost* value for each SVM. - -# # - --svmWeights=*weights*: provide the weight values for the -# # activities. If provided this should be a sequence of (label, -# # weight) 2-tuples *nActs* long. If not provided, a weight of 1 -# # will be used for each activity. - -# # - --svmEps=*epsilon*: provide the epsilon value used to determine -# # when the SVM has converged. Defaults to 0.001 - -# # - --svmDegree=*degree*: provide the degree of the kernel (when -# # sensible) Defaults to 3 - -# # - --svmCoeff=*coeff*: provide the coefficient for the kernel (when -# # sensible) Defaults to 0 - -# # - --svmNu=*nu*: provide the nu value for the kernel (when sensible) -# # Defaults to 0.5 - -# # - --svmDataType=*float*: if the data is contains only 1 and 0 s, specify by -# # using binary. Defaults to float - -# # - --svmCache=*cache*: provide the size of the memory cache (in MB) -# # to be used while building the SVM. Defaults to 40 - -**Notes** - - - *Data filtering*: When there is a large disparity between the - numbers of points with various activity levels present in the - training set it is sometimes desirable to train on a more - homogeneous data set. This can be accomplished using filtering. - The filtering process works by selecting a particular target - fraction and target value. For example, in a case where 95% of - the original training set has activity 0 and ony 5% activity 1, we - could filter (by randomly removing points with activity 0) so that - 30% of the data set used to build the composite has activity 1. - - -""" -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) - -import pickle -import sys -import time - -import numpy - -from rdkit import DataStructs -from rdkit.Dbase import DbModule -from rdkit.ML import CompositeRun, ScreenComposite -from rdkit.ML.Composite import BayesComposite, Composite -from rdkit.ML.Data import DataUtils, SplitData -from rdkit.utils import listutils - -# # from ML.SVM import SVMClassificationModel as SVM -_runDetails = CompositeRun.CompositeRun() - -__VERSION_STRING = "3.2.3" - -_verbose = 1 - - -def message(msg): - """ emits messages to _sys.stdout_ - override this in modules which import this one to redirect output - - **Arguments** - - - msg: the string to be displayed - - """ - if _verbose: - sys.stdout.write('%s\n' % (msg)) - - -def testall(composite, examples, badExamples=[]): - """ screens a number of examples past a composite - - **Arguments** - - - composite: a composite model - - - examples: a list of examples (with results) to be screened - - - badExamples: a list to which misclassified examples are appended - - **Returns** - - a list of 2-tuples containing: - - 1) a vote - - 2) a confidence - - these are the votes and confidence levels for **misclassified** examples - - """ - wrong = [] - for example in examples: - if composite.GetActivityQuantBounds(): - answer = composite.QuantizeActivity(example)[-1] - else: - answer = example[-1] - res, conf = composite.ClassifyExample(example) - if res != answer: - wrong.append((res, conf)) - badExamples.append(example) - - return wrong - - -def GetCommandLine(details): - """ #DOC - - """ - args = ['BuildComposite'] - args.append('-n %d' % (details.nModels)) - if details.filterFrac != 0.0: - args.append('-F %.3f -v %d' % (details.filterFrac, details.filterVal)) - if details.modelFilterFrac != 0.0: - args.append('--modelFiltFrac=%.3f --modelFiltVal=%d' % - (details.modelFilterFrac, details.modelFilterVal)) - if details.splitRun: - args.append('-s -f %.3f' % (details.splitFrac)) - if details.shuffleActivities: - args.append('-S') - if details.randomActivities: - args.append('-r') - if details.threshold > 0.0: - args.append('-t %.3f' % (details.threshold)) - if details.activityBounds: - args.append('-Q "%s"' % (details.activityBoundsVals)) - if details.dbName: - args.append('-d %s' % (details.dbName)) - if details.detailedRes: - args.append('-D') - if hasattr(details, 'noScreen') and details.noScreen: - args.append('--noScreen') - if details.persistTblName and details.dbName: - args.append('-p %s' % (details.persistTblName)) - if details.note: - args.append('-N %s' % (details.note)) - if details.useTrees: - if details.limitDepth > 0: - args.append('-L %d' % (details.limitDepth)) - if details.lessGreedy: - args.append('-g') - if details.qBounds: - shortBounds = listutils.CompactListRepr(details.qBounds) - if details.qBounds: - args.append('-q "%s"' % (shortBounds)) - else: - if details.qBounds: - args.append('-q "%s"' % (details.qBoundCount)) - - if details.pruneIt: - args.append('--prune') - if details.startAt: - args.append('-G %d' % details.startAt) - if details.recycleVars: - args.append('--recycle') - if details.randomDescriptors: - args.append('--randomDescriptors=%d' % details.randomDescriptors) - if details.useSigTrees: - args.append('--doSigTree') - if details.limitDepth > 0: - args.append('-L %d' % (details.limitDepth)) - if details.randomDescriptors: - args.append('--randomDescriptors=%d' % details.randomDescriptors) - - if details.useKNN: - args.append('--doKnn --knnK %d' % (details.knnNeighs)) - if details.knnDistFunc == 'Tanimoto': - args.append('--knnTanimoto') - else: - args.append('--knnEuclid') - - if details.useNaiveBayes: - args.append('--doNaiveBayes') - if details.mEstimateVal >= 0.0: - args.append('--mEstimateVal=%.3f' % details.mEstimateVal) - - # # if details.useSVM: - # # args.append('--doSVM') - # # if details.svmKernel: - # # for k in SVM.kernels.keys(): - # # if SVM.kernels[k]==details.svmKernel: - # # args.append('--svmKernel=%s'%k) - # # break - # # if details.svmType: - # # for k in SVM.machineTypes.keys(): - # # if SVM.machineTypes[k]==details.svmType: - # # args.append('--svmType=%s'%k) - # # break - # # if details.svmGamma: - # # args.append('--svmGamma=%f'%details.svmGamma) - # # if details.svmCost: - # # args.append('--svmCost=%f'%details.svmCost) - # # if details.svmWeights: - # # args.append("--svmWeights='%s'"%str(details.svmWeights)) - # # if details.svmDegree: - # # args.append('--svmDegree=%d'%details.svmDegree) - # # if details.svmCoeff: - # # args.append('--svmCoeff=%d'%details.svmCoeff) - # # if details.svmEps: - # # args.append('--svmEps=%f'%details.svmEps) - # # if details.svmNu: - # # args.append('--svmNu=%f'%details.svmNu) - # # if details.svmCache: - # # args.append('--svmCache=%d'%details.svmCache) - # # if detail.svmDataType: - # # args.append('--svmDataType=%s'%details.svmDataType) - # # if not details.svmShrink: - # # args.append('--svmShrink') - - if details.replacementSelection: - args.append('--replacementSelection') - - # this should always be last: - if details.tableName: - args.append(details.tableName) - - return ' '.join(args) - - -def RunOnData(details, data, progressCallback=None, saveIt=1, setDescNames=0): - if details.lockRandom: - seed = details.randomSeed - else: - import random - seed = (random.randint(0, 1e6), random.randint(0, 1e6)) - DataUtils.InitRandomNumbers(seed) - testExamples = [] - if details.shuffleActivities == 1: - DataUtils.RandomizeActivities(data, shuffle=1, runDetails=details) - elif details.randomActivities == 1: - DataUtils.RandomizeActivities(data, shuffle=0, runDetails=details) - - namedExamples = data.GetNamedData() - if details.splitRun == 1: - trainIdx, testIdx = SplitData.SplitIndices(len(namedExamples), details.splitFrac, - silent=not _verbose) - - trainExamples = [namedExamples[x] for x in trainIdx] - testExamples = [namedExamples[x] for x in testIdx] - else: - testExamples = [] - testIdx = [] - trainIdx = list(range(len(namedExamples))) - trainExamples = namedExamples - - if details.filterFrac != 0.0: - # if we're doing quantization on the fly, we need to handle that here: - if hasattr(details, 'activityBounds') and details.activityBounds: - tExamples = [] - bounds = details.activityBounds - for pt in trainExamples: - pt = pt[:] - act = pt[-1] - placed = 0 - bound = 0 - while not placed and bound < len(bounds): - if act < bounds[bound]: - pt[-1] = bound - placed = 1 - else: - bound += 1 - if not placed: - pt[-1] = bound - tExamples.append(pt) - else: - bounds = None - tExamples = trainExamples - trainIdx, temp = DataUtils.FilterData(tExamples, details.filterVal, details.filterFrac, -1, - indicesOnly=1) - tmp = [trainExamples[x] for x in trainIdx] - testExamples += [trainExamples[x] for x in temp] - trainExamples = tmp - - counts = DataUtils.CountResults(trainExamples, bounds=bounds) - ks = counts.keys() - ks.sort() - message('Result Counts in training set:') - for k in ks: - message(str((k, counts[k]))) - counts = DataUtils.CountResults(testExamples, bounds=bounds) - ks = counts.keys() - ks.sort() - message('Result Counts in test set:') - for k in ks: - message(str((k, counts[k]))) - nExamples = len(trainExamples) - message('Training with %d examples' % (nExamples)) - - nVars = data.GetNVars() - attrs = list(range(1, nVars + 1)) - nPossibleVals = data.GetNPossibleVals() - for i in range(1, len(nPossibleVals)): - if nPossibleVals[i - 1] == -1: - attrs.remove(i) - - if details.pickleDataFileName != '': - pickleDataFile = open(details.pickleDataFileName, 'wb+') - pickle.dump(trainExamples, pickleDataFile) - pickle.dump(testExamples, pickleDataFile) - pickleDataFile.close() - - if details.bayesModel: - composite = BayesComposite.BayesComposite() - else: - composite = Composite.Composite() - - composite._randomSeed = seed - composite._splitFrac = details.splitFrac - composite._shuffleActivities = details.shuffleActivities - composite._randomizeActivities = details.randomActivities - - if hasattr(details, 'filterFrac'): - composite._filterFrac = details.filterFrac - if hasattr(details, 'filterVal'): - composite._filterVal = details.filterVal - - composite.SetModelFilterData(details.modelFilterFrac, details.modelFilterVal) - - composite.SetActivityQuantBounds(details.activityBounds) - nPossibleVals = data.GetNPossibleVals() - if details.activityBounds: - nPossibleVals[-1] = len(details.activityBounds) + 1 - - if setDescNames: - composite.SetInputOrder(data.GetVarNames()) - composite.SetDescriptorNames(details._descNames) - else: - composite.SetDescriptorNames(data.GetVarNames()) - composite.SetActivityQuantBounds(details.activityBounds) - if details.nModels == 1: - details.internalHoldoutFrac = 0.0 - if details.useTrees: - from rdkit.ML.DecTree import CrossValidate, PruneTree - if details.qBounds != []: - from rdkit.ML.DecTree import BuildQuantTree - builder = BuildQuantTree.QuantTreeBoot - else: - from rdkit.ML.DecTree import ID3 - builder = ID3.ID3Boot - driver = CrossValidate.CrossValidationDriver - pruner = PruneTree.PruneTree - - composite.SetQuantBounds(details.qBounds) - nPossibleVals = data.GetNPossibleVals() - if details.activityBounds: - nPossibleVals[-1] = len(details.activityBounds) + 1 - composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, - pruner=pruner, nTries=details.nModels, pruneIt=details.pruneIt, - lessGreedy=details.lessGreedy, needsQuantization=0, treeBuilder=builder, - nQuantBounds=details.qBounds, startAt=details.startAt, - maxDepth=details.limitDepth, progressCallback=progressCallback, - holdOutFrac=details.internalHoldoutFrac, - replacementSelection=details.replacementSelection, - recycleVars=details.recycleVars, randomDescriptors=details.randomDescriptors, - silent=not _verbose) - - elif details.useSigTrees: - from rdkit.ML.DecTree import BuildSigTree, CrossValidate - builder = BuildSigTree.SigTreeBuilder - driver = CrossValidate.CrossValidationDriver - nPossibleVals = data.GetNPossibleVals() - if details.activityBounds: - nPossibleVals[-1] = len(details.activityBounds) + 1 - if hasattr(details, 'sigTreeBiasList'): - biasList = details.sigTreeBiasList - else: - biasList = None - if hasattr(details, 'useCMIM'): - useCMIM = details.useCMIM - else: - useCMIM = 0 - if hasattr(details, 'allowCollections'): - allowCollections = details.allowCollections - else: - allowCollections = False - composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, - nTries=details.nModels, needsQuantization=0, treeBuilder=builder, - maxDepth=details.limitDepth, progressCallback=progressCallback, - holdOutFrac=details.internalHoldoutFrac, - replacementSelection=details.replacementSelection, - recycleVars=details.recycleVars, randomDescriptors=details.randomDescriptors, - biasList=biasList, useCMIM=useCMIM, allowCollection=allowCollections, - silent=not _verbose) - - elif details.useKNN: - from rdkit.ML.KNN import CrossValidate, DistFunctions - - driver = CrossValidate.CrossValidationDriver - dfunc = '' - if (details.knnDistFunc == "Euclidean"): - dfunc = DistFunctions.EuclideanDist - elif (details.knnDistFunc == "Tanimoto"): - dfunc = DistFunctions.TanimotoDist - else: - assert 0, "Bad KNN distance metric value" - - composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, - nTries=details.nModels, needsQuantization=0, numNeigh=details.knnNeighs, - holdOutFrac=details.internalHoldoutFrac, distFunc=dfunc) - - elif details.useNaiveBayes or details.useSigBayes: - from rdkit.ML.NaiveBayes import CrossValidate - driver = CrossValidate.CrossValidationDriver - if not (hasattr(details, 'useSigBayes') and details.useSigBayes): - composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, - nTries=details.nModels, needsQuantization=0, nQuantBounds=details.qBounds, - holdOutFrac=details.internalHoldoutFrac, - replacementSelection=details.replacementSelection, - mEstimateVal=details.mEstimateVal, silent=not _verbose) - else: - if hasattr(details, 'useCMIM'): - useCMIM = details.useCMIM - else: - useCMIM = 0 - - composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, - nTries=details.nModels, needsQuantization=0, nQuantBounds=details.qBounds, - mEstimateVal=details.mEstimateVal, useSigs=True, useCMIM=useCMIM, - holdOutFrac=details.internalHoldoutFrac, - replacementSelection=details.replacementSelection, silent=not _verbose) - - # # elif details.useSVM: - # # from rdkit.ML.SVM import CrossValidate - # # driver = CrossValidate.CrossValidationDriver - # # composite.Grow(trainExamples, attrs, nPossibleVals=[0]+nPossibleVals, - # # buildDriver=driver, nTries=details.nModels, - # # needsQuantization=0, - # # cost=details.svmCost,gamma=details.svmGamma, - # # weights=details.svmWeights,degree=details.svmDegree, - # # type=details.svmType,kernelType=details.svmKernel, - # # coef0=details.svmCoeff,eps=details.svmEps,nu=details.svmNu, - # # cache_size=details.svmCache,shrinking=details.svmShrink, - # # dataType=details.svmDataType, - # # holdOutFrac=details.internalHoldoutFrac, - # # replacementSelection=details.replacementSelection, - # # silent=not _verbose) - - else: - from rdkit.ML.Neural import CrossValidate - driver = CrossValidate.CrossValidationDriver - composite.Grow(trainExamples, attrs, [0] + nPossibleVals, nTries=details.nModels, - buildDriver=driver, needsQuantization=0) - - composite.AverageErrors() - composite.SortModels() - modelList, counts, avgErrs = composite.GetAllData() - counts = numpy.array(counts) - avgErrs = numpy.array(avgErrs) - composite._varNames = data.GetVarNames() - - for i in range(len(modelList)): - modelList[i].NameModel(composite._varNames) - - # do final statistics - weightedErrs = counts * avgErrs - averageErr = sum(weightedErrs) / sum(counts) - devs = (avgErrs - averageErr) - devs = devs * counts - devs = numpy.sqrt(devs * devs) - avgDev = sum(devs) / sum(counts) - message('# Overall Average Error: %%% 5.2f, Average Deviation: %%% 6.2f' % - (100. * averageErr, 100. * avgDev)) - - if details.bayesModel: - composite.Train(trainExamples, verbose=0) - - # blow out the saved examples and then save the composite: - composite.ClearModelExamples() - if saveIt: - composite.Pickle(details.outName) - details.model = DbModule.binaryHolder(pickle.dumps(composite)) - - badExamples = [] - if not details.detailedRes and (not hasattr(details, 'noScreen') or not details.noScreen): - if details.splitRun: - message('Testing all hold-out examples') - wrong = testall(composite, testExamples, badExamples) - message('%d examples (%% %5.2f) were misclassified' % - (len(wrong), 100. * float(len(wrong)) / float(len(testExamples)))) - _runDetails.holdout_error = float(len(wrong)) / len(testExamples) - else: - message('Testing all examples') - wrong = testall(composite, namedExamples, badExamples) - message('%d examples (%% %5.2f) were misclassified' % - (len(wrong), 100. * float(len(wrong)) / float(len(namedExamples)))) - _runDetails.overall_error = float(len(wrong)) / len(namedExamples) - - if details.detailedRes: - message('\nEntire data set:') - resTup = ScreenComposite.ShowVoteResults(range(data.GetNPts()), data, composite, - nPossibleVals[-1], details.threshold) - nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup - nPts = len(namedExamples) - nClass = nGood + nBad - _runDetails.overall_error = float(nBad) / nClass - _runDetails.overall_correct_conf = avgGood - _runDetails.overall_incorrect_conf = avgBad - _runDetails.overall_result_matrix = repr(voteTab) - nRej = nClass - nPts - if nRej > 0: - _runDetails.overall_fraction_dropped = float(nRej) / nPts - - if details.splitRun: - message('\nHold-out data:') - resTup = ScreenComposite.ShowVoteResults(range(len(testExamples)), testExamples, composite, - nPossibleVals[-1], details.threshold) - nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup - nPts = len(testExamples) - nClass = nGood + nBad - _runDetails.holdout_error = float(nBad) / nClass - _runDetails.holdout_correct_conf = avgGood - _runDetails.holdout_incorrect_conf = avgBad - _runDetails.holdout_result_matrix = repr(voteTab) - nRej = nClass - nPts - if nRej > 0: - _runDetails.holdout_fraction_dropped = float(nRej) / nPts - - if details.persistTblName and details.dbName: - message('Updating results table %s:%s' % (details.dbName, details.persistTblName)) - details.Store(db=details.dbName, table=details.persistTblName) - - if details.badName != '': - badFile = open(details.badName, 'w+') - for i in range(len(badExamples)): - ex = badExamples[i] - vote = wrong[i] - outStr = '%s\t%s\n' % (ex, vote) - badFile.write(outStr) - badFile.close() - - composite.ClearModelExamples() - return composite - - -def RunIt(details, progressCallback=None, saveIt=1, setDescNames=0): - """ does the actual work of building a composite model - - **Arguments** - - - details: a _CompositeRun.CompositeRun_ object containing details - (options, parameters, etc.) about the run - - - progressCallback: (optional) a function which is called with a single - argument (the number of models built so far) after each model is built. - - - saveIt: (optional) if this is nonzero, the resulting model will be pickled - and dumped to the filename specified in _details.outName_ - - - setDescNames: (optional) if nonzero, the composite's _SetInputOrder()_ method - will be called using the results of the data set's _GetVarNames()_ method; - it is assumed that the details object has a _descNames attribute which - is passed to the composites _SetDescriptorNames()_ method. Otherwise - (the default), _SetDescriptorNames()_ gets the results of _GetVarNames()_. - - **Returns** - - the composite model constructed - - - """ - details.rundate = time.asctime() - - fName = details.tableName.strip() - if details.outName == '': - details.outName = fName + '.pkl' - if not details.dbName: - if details.qBounds != []: - data = DataUtils.TextFileToData(fName) - else: - data = DataUtils.BuildQuantDataSet(fName) - elif details.useSigTrees or details.useSigBayes: - details.tableName = fName - data = details.GetDataSet(pickleCol=0, pickleClass=DataStructs.ExplicitBitVect) - elif details.qBounds != [] or not details.useTrees: - details.tableName = fName - data = details.GetDataSet() - else: - data = DataUtils.DBToQuantData( - details.dbName, # Function no longer defined - fName, - quantName=details.qTableName, - user=details.dbUser, - password=details.dbPassword) - - composite = RunOnData(details, data, progressCallback=progressCallback, saveIt=saveIt, - setDescNames=setDescNames) - return composite - - -def ShowVersion(includeArgs=0): - """ prints the version number - - """ - print('This is BuildComposite.py version %s' % (__VERSION_STRING)) - if includeArgs: - print('command line was:') - print(' '.join(sys.argv)) - - -def Usage(): - """ provides a list of arguments for when this is used from the command line - - """ - print(__doc__) - sys.exit(-1) - - -def SetDefaults(runDetails=None): - """ initializes a details object with default values - - **Arguments** - - - details: (optional) a _CompositeRun.CompositeRun_ object. - If this is not provided, the global _runDetails will be used. - - **Returns** - - the initialized _CompositeRun_ object. - - - """ - if runDetails is None: - runDetails = _runDetails - return CompositeRun.SetDefaults(runDetails) - - -def ParseArgs(runDetails): - """ parses command line arguments and updates _runDetails_ - - **Arguments** - - - runDetails: a _CompositeRun.CompositeRun_ object. - - """ - import getopt - args, extra = getopt.getopt( - sys.argv[1:], - 'P:o:n:p:b:sf:F:v:hlgd:rSTt:BQ:q:DVG:N:L:', - [ - 'nRuns=', - 'prune', - 'profile', - 'seed=', - 'noScreen', - 'modelFiltFrac=', - 'modelFiltVal=', - 'recycle', - 'randomDescriptors=', - 'doKnn', - 'knnK=', - 'knnTanimoto', - 'knnEuclid', - 'doSigTree', - 'allowCollections', - 'doNaiveBayes', - 'mEstimateVal=', - 'doSigBayes', - - # # 'doSVM','svmKernel=','svmType=','svmGamma=', - # # 'svmCost=','svmWeights=','svmDegree=', - # # 'svmCoeff=','svmEps=','svmNu=','svmCache=', - # # 'svmShrink','svmDataType=', - 'replacementSelection', - ]) - runDetails.profileIt = 0 - for arg, val in args: - if arg == '-n': - runDetails.nModels = int(val) - elif arg == '-N': - runDetails.note = val - elif arg == '-o': - runDetails.outName = val - elif arg == '-Q': - qBounds = eval(val) - assert type(qBounds) in [type([]), type( - ())], 'bad argument type for -Q, specify a list as a string' - runDetails.activityBounds = qBounds - runDetails.activityBoundsVals = val - elif arg == '-p': - runDetails.persistTblName = val - elif arg == '-P': - runDetails.pickleDataFileName = val - elif arg == '-r': - runDetails.randomActivities = 1 - elif arg == '-S': - runDetails.shuffleActivities = 1 - elif arg == '-b': - runDetails.badName = val - elif arg == '-B': - runDetails.bayesModels = 1 - elif arg == '-s': - runDetails.splitRun = 1 - elif arg == '-f': - runDetails.splitFrac = float(val) - elif arg == '-F': - runDetails.filterFrac = float(val) - elif arg == '-v': - runDetails.filterVal = float(val) - elif arg == '-l': - runDetails.lockRandom = 1 - elif arg == '-g': - runDetails.lessGreedy = 1 - elif arg == '-G': - runDetails.startAt = int(val) - elif arg == '-d': - runDetails.dbName = val - elif arg == '-T': - runDetails.useTrees = 0 - elif arg == '-t': - runDetails.threshold = float(val) - elif arg == '-D': - runDetails.detailedRes = 1 - elif arg == '-L': - runDetails.limitDepth = int(val) - elif arg == '-q': - qBounds = eval(val) - assert type(qBounds) in [type([]), type( - ())], 'bad argument type for -q, specify a list as a string' - runDetails.qBoundCount = val - runDetails.qBounds = qBounds - elif arg == '-V': - ShowVersion() - sys.exit(0) - elif arg == '--nRuns': - runDetails.nRuns = int(val) - elif arg == '--modelFiltFrac': - runDetails.modelFilterFrac = float(val) - elif arg == '--modelFiltVal': - runDetails.modelFilterVal = float(val) - elif arg == '--prune': - runDetails.pruneIt = 1 - elif arg == '--profile': - runDetails.profileIt = 1 - - elif arg == '--recycle': - runDetails.recycleVars = 1 - elif arg == '--randomDescriptors': - runDetails.randomDescriptors = int(val) - - elif arg == '--doKnn': - runDetails.useKNN = 1 - runDetails.useTrees = 0 - # # runDetails.useSVM=0 - runDetails.useNaiveBayes = 0 - elif arg == '--knnK': - runDetails.knnNeighs = int(val) - elif arg == '--knnTanimoto': - runDetails.knnDistFunc = "Tanimoto" - elif arg == '--knnEuclid': - runDetails.knnDistFunc = "Euclidean" - - elif arg == '--doSigTree': - # # runDetails.useSVM=0 - runDetails.useKNN = 0 - runDetails.useTrees = 0 - runDetails.useNaiveBayes = 0 - runDetails.useSigTrees = 1 - elif arg == '--allowCollections': - runDetails.allowCollections = True - - elif arg == '--doNaiveBayes': - runDetails.useNaiveBayes = 1 - # # runDetails.useSVM=0 - runDetails.useKNN = 0 - runDetails.useTrees = 0 - runDetails.useSigBayes = 0 - elif arg == '--doSigBayes': - runDetails.useSigBayes = 1 - runDetails.useNaiveBayes = 0 - # # runDetails.useSVM=0 - runDetails.useKNN = 0 - runDetails.useTrees = 0 - elif arg == '--mEstimateVal': - runDetails.mEstimateVal = float(val) - - -# # elif arg == '--doSVM': -# # runDetails.useSVM=1 -# # runDetails.useKNN=0 -# # runDetails.useTrees=0 -# # runDetails.useNaiveBayes=0 -# # elif arg == '--svmKernel': -# # if val not in SVM.kernels.keys(): -# # message('kernel %s not in list of available kernels:\n%s\n'%(val,SVM.kernels.keys())) -# # sys.exit(-1) -# # else: -# # runDetails.svmKernel=SVM.kernels[val] -# # elif arg == '--svmType': -# # if val not in SVM.machineTypes.keys(): -# # message('type %s not in list of available machines:\n%s\n'%(val, -# # SVM.machineTypes.keys())) -# # sys.exit(-1) -# # else: -# # runDetails.svmType=SVM.machineTypes[val] -# # elif arg == '--svmGamma': -# # runDetails.svmGamma = float(val) -# # elif arg == '--svmCost': -# # runDetails.svmCost = float(val) -# # elif arg == '--svmWeights': -# # # FIX: this is dangerous -# # runDetails.svmWeights = eval(val) -# # elif arg == '--svmDegree': -# # runDetails.svmDegree = int(val) -# # elif arg == '--svmCoeff': -# # runDetails.svmCoeff = float(val) -# # elif arg == '--svmEps': -# # runDetails.svmEps = float(val) -# # elif arg == '--svmNu': -# # runDetails.svmNu = float(val) -# # elif arg == '--svmCache': -# # runDetails.svmCache = int(val) -# # elif arg == '--svmShrink': -# # runDetails.svmShrink = 0 -# # elif arg == '--svmDataType': -# # runDetails.svmDataType=val - - elif arg == '--seed': - # FIX: dangerous - runDetails.randomSeed = eval(val) - - elif arg == '--noScreen': - runDetails.noScreen = 1 - - elif arg == '--replacementSelection': - runDetails.replacementSelection = 1 - - elif arg == '-h': - Usage() - - else: - Usage() - runDetails.tableName = extra[0] - -if __name__ == '__main__': - if len(sys.argv) < 2: - Usage() - - _runDetails.cmd = ' '.join(sys.argv) - SetDefaults(_runDetails) - ParseArgs(_runDetails) - - ShowVersion(includeArgs=1) - - if _runDetails.nRuns > 1: - for i in range(_runDetails.nRuns): - sys.stderr.write( - '---------------------------------\n\tDoing %d of %d\n---------------------------------\n' % - (i + 1, _runDetails.nRuns)) - RunIt(_runDetails) - else: - if _runDetails.profileIt: - try: - import hotshot - import hotshot.stats - prof = hotshot.Profile('prof.dat') - prof.runcall(RunIt, _runDetails) - stats = hotshot.stats.load('prof.dat') - stats.strip_dirs() - stats.sort_stats('time', 'calls') - stats.print_stats(30) - except ImportError: - print('Profiling requires the hotshot module') - else: - RunIt(_runDetails) diff --git a/rdkit/ML/Composite/AdjustComposite.py b/rdkit/ML/Composite/AdjustComposite.py deleted file mode 100644 index 0fb1836fbb7..00000000000 --- a/rdkit/ML/Composite/AdjustComposite.py +++ /dev/null @@ -1,89 +0,0 @@ -# $Id$ -# -# Copyright (C) 2003 greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -""" functionality to allow adjusting composite model contents - -""" - -import copy - -import numpy - - -def BalanceComposite(model, set1, set2, weight, targetSize, names1=None, names2=None): - """ adjusts the contents of the composite model so as to maximize - the weighted classification accuracty across the two data sets. - - The resulting composite model, with _targetSize_ models, is returned. - - **Notes**: - - - if _names1_ and _names2_ are not provided, _set1_ and _set2_ should - have the same ordering of columns and _model_ should have already - have had _SetInputOrder()_ called. - - """ - # - # adjust the weights to be proportional to the size of the two data sets - # The normalization we do here assures that a perfect model contributes - # a score of S1+S2 to the final - # - S1 = len(set1) - S2 = len(set2) - weight1 = float(S1 + S2) * (1 - weight) / S1 - weight2 = float(S1 + S2) * weight / S2 - # print('\t:::', S1, S2, weight1, weight2) - # print('nModels:', len(model)) - # start with a copy so that we get all the additional schnick-schnack - res = copy.copy(model) - res.modelList = [] - res.errList = [] - res.countList = [] - res.quantizationRequirements = [] - - startSize = len(model) - scores = numpy.zeros(startSize, float) - actQuantBounds = model.GetActivityQuantBounds() - if names1 is not None: - model.SetInputOrder(names1) - for pt in set1: - pred, conf = model.ClassifyExample(pt) - if actQuantBounds: - ans = model.QuantizeActivity(pt)[-1] - else: - ans = pt[-1] - votes = model.GetVoteDetails() - for i in range(startSize): - if votes[i] == ans: - scores[i] += weight1 - if names2 is not None: - model.SetInputOrder(names2) - for pt in set2: - pred, conf = model.ClassifyExample(pt) - if actQuantBounds: - ans = model.QuantizeActivity(pt)[-1] - else: - ans = pt[-1] - votes = model.GetVoteDetails() - for i in range(startSize): - if votes[i] == ans: - scores[i] += weight2 - # normalize the scores - nPts = S1 + S2 - scores /= nPts - # sort them: - bestOrder = list(numpy.argsort(scores)) - bestOrder.reverse() - print('\tTAKE:', bestOrder[:targetSize]) - # and now take the best set: - for i in range(targetSize): - idx = bestOrder[i] - mdl = model.modelList[idx] - res.modelList.append(mdl) - res.errList.append(1. - scores[idx]) - res.countList.append(1) - # FIX: this should probably be more general: - res.quantizationRequirements.append(0) - return res diff --git a/rdkit/ML/Composite/BayesComposite.py b/rdkit/ML/Composite/BayesComposite.py deleted file mode 100644 index 3296e3e5924..00000000000 --- a/rdkit/ML/Composite/BayesComposite.py +++ /dev/null @@ -1,171 +0,0 @@ -# $Id$ -# -# Copyright (C) 2000-2008 greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -""" code for dealing with Bayesian composite models - -For a model to be useable here, it should support the following API: - - - _ClassifyExample(example)_, returns a classification - -Other compatibility notes: - - 1) To use _Composite.Grow_ there must be some kind of builder - functionality which returns a 2-tuple containing (model,percent accuracy). - - 2) The models should be pickleable - - 3) It would be very happy if the models support the __cmp__ method so that - membership tests used to make sure models are unique work. - - - -""" - -import numpy - -from rdkit.ML.Composite import Composite - - -class BayesComposite(Composite.Composite): - """a composite model using Bayesian statistics in the Decision Proxy - - - **Notes** - - - typical usage: - - 1) grow the composite with AddModel until happy with it - - 2) call AverageErrors to calculate the average error values - - 3) call SortModels to put things in order by either error or count - - 4) call Train to update the Bayesian stats. - - """ - - def Train(self, data, verbose=0): - # FIX: this is wrong because it doesn't take the counts of each model into account - nModels = len(self) - nResults = self.nPossibleVals[-1] - self.resultProbs = numpy.zeros(nResults, float) - self.condProbs = [None] * nModels - - for i in range(nModels): - self.condProbs[i] = numpy.zeros((nResults, nResults), float) - # FIX: this is a quick hack which may slow things down a lot - for example in data: - act = self.QuantizeActivity(example)[-1] - self.resultProbs[int(act)] += 1 - - for example in data: - if self._mapOrder is not None: - example = self._RemapInput(example) - if self.GetActivityQuantBounds(): - example = self.QuantizeActivity(example) - if self.quantBounds is not None and 1 in self.quantizationRequirements: - quantExample = self.QuantizeExample(example, self.quantBounds) - else: - quantExample = [] - - trueRes = int(example[-1]) - - votes = self.CollectVotes(example, quantExample) - - for i in range(nModels): - self.condProbs[i][votes[i], trueRes] += 1 - - # self.condProbs /= self.resultProbs - for i in range(nModels): - for j in range(nResults): - self.condProbs[i][j] /= sum(self.condProbs[i][j]) - # self.condProbs[i] /= self.resultProbs - - self.resultProbs /= sum(self.resultProbs) - - if verbose: - print('**** Bayesian Results') - print('Result probabilities') - print('\t', self.resultProbs) - print('Model by model breakdown of conditional probs') - for mat in self.condProbs: - for row in mat: - print('\t', row) - print() - - def ClassifyExample(self, example, threshold=0, verbose=0, appendExample=0): - """ classifies the given example using the entire composite - - **Arguments** - - - example: the data to be classified - - - threshold: if this is a number greater than zero, then a - classification will only be returned if the confidence is - above _threshold_. Anything lower is returned as -1. - - **Returns** - - a (result,confidence) tuple - - """ - if self._mapOrder is not None: - example = self._RemapInput(example) - if self.GetActivityQuantBounds(): - example = self.QuantizeActivity(example) - if self.quantBounds is not None and 1 in self.quantizationRequirements: - quantExample = self.QuantizeExample(example, self.quantBounds) - else: - quantExample = [] - self.modelVotes = self.CollectVotes(example, quantExample, appendExample=appendExample) - - nPossibleRes = self.nPossibleVals[-1] - votes = [0.] * nPossibleRes - for i in range(len(self)): - predict = self.modelVotes[i] - for j in range(nPossibleRes): - votes[j] += self.condProbs[i][predict, j] - - # totVotes = sum(votes) - res = numpy.argmax(votes) - conf = votes[res] / len(self) - if verbose: - print(votes, conf, example[-1]) - if conf > threshold: - return res, conf - else: - return -1, conf - - def __init__(self): - Composite.Composite.__init__(self) - self.resultProbs = None - self.condProbs = None - - -def CompositeToBayesComposite(obj): - """ converts a Composite to a BayesComposite - - if _obj_ is already a BayesComposite or if it is not a _Composite.Composite_ , - nothing will be done. - - """ - if obj.__class__ == BayesComposite: - return - elif obj.__class__ == Composite.Composite: - obj.__class__ = BayesComposite - obj.resultProbs = None - obj.condProbs = None - - -def BayesCompositeToComposite(obj): - """ converts a BayesComposite to a Composite.Composite - - """ - if obj.__class__ == Composite.Composite: - return - elif obj.__class__ == BayesComposite: - obj.__class__ = Composite.Composite - obj.resultProbs = None - obj.condProbs = None diff --git a/rdkit/ML/Composite/Composite.py b/rdkit/ML/Composite/Composite.py deleted file mode 100755 index 8b7c1dda43c..00000000000 --- a/rdkit/ML/Composite/Composite.py +++ /dev/null @@ -1,725 +0,0 @@ -# $Id$ -# -# Copyright (C) 2000-2008 greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -""" code for dealing with composite models - -For a model to be useable here, it should support the following API: - - - _ClassifyExample(example)_, returns a classification - -Other compatibility notes: - - 1) To use _Composite.Grow_ there must be some kind of builder - functionality which returns a 2-tuple containing (model,percent accuracy). - - 2) The models should be pickleable - - 3) It would be very happy if the models support the __cmp__ method so that - membership tests used to make sure models are unique work. - - - -""" - -import pickle - -import numpy - -from rdkit.ML.Data import DataUtils - - -class Composite(object): - """a composite model - - - **Notes** - - - adding a model which is already present just results in its count - field being incremented and the errors being averaged. - - - typical usage: - - 1) grow the composite with AddModel until happy with it - - 2) call AverageErrors to calculate the average error values - - 3) call SortModels to put things in order by either error or count - - - Composites can support individual models requiring either quantized or - nonquantized data. This is done by keeping a set of quantization bounds - (_QuantBounds_) in the composite and quantizing data passed in when required. - Quantization bounds can be set and interrogated using the - _Get/SetQuantBounds()_ methods. When models are added to the composite, - it can be indicated whether or not they require quantization. - - - Composites are also capable of extracting relevant variables from longer lists. - This is accessible using _SetDescriptorNames()_ to register the descriptors about - which the composite cares and _SetInputOrder()_ to tell the composite what the - ordering of input vectors will be. **Note** there is a limitation on this: each - model needs to take the same set of descriptors as inputs. This could be changed. - - """ - - def __init__(self): - self.modelList = [] - self.errList = [] - self.countList = [] - self.modelVotes = [] - self.quantBounds = None - self.nPossibleVals = None - self.quantizationRequirements = [] - self._descNames = [] - self._mapOrder = None - self.activityQuant = [] - - def SetModelFilterData(self, modelFilterFrac=0.0, modelFilterVal=0.0): - self._modelFilterFrac = modelFilterFrac - self._modelFilterVal = modelFilterVal - - def SetDescriptorNames(self, names): - """ registers the names of the descriptors this composite uses - - **Arguments** - - - names: a list of descriptor names (strings). - - **NOTE** - - the _names_ list is not - copied, so if you modify it later, the composite itself will also be modified. - - """ - self._descNames = names - - def GetDescriptorNames(self): - """ returns the names of the descriptors this composite uses - - """ - return self._descNames - - def SetQuantBounds(self, qBounds, nPossible=None): - """ sets the quantization bounds that the composite will use - - **Arguments** - - - qBounds: a list of quantization bounds, each quantbound is a - list of boundaries - - - nPossible: a list of integers indicating how many possible values - each descriptor can take on. - - **NOTE** - - - if the two lists are of different lengths, this will assert out - - - neither list is copied, so if you modify it later, the composite - itself will also be modified. - - """ - if nPossible is not None: - assert len(qBounds) == len(nPossible), 'qBounds/nPossible mismatch' - self.quantBounds = qBounds - self.nPossibleVals = nPossible - - def GetQuantBounds(self): - """ returns the quantization bounds - - **Returns** - - a 2-tuple consisting of: - - 1) the list of quantization bounds - - 2) the nPossibleVals list - - """ - return self.quantBounds, self.nPossibleVals - - def GetActivityQuantBounds(self): - if not hasattr(self, 'activityQuant'): - self.activityQuant = [] - return self.activityQuant - - def SetActivityQuantBounds(self, bounds): - self.activityQuant = bounds - - def QuantizeActivity(self, example, activityQuant=None, actCol=-1): - if activityQuant is None: - activityQuant = self.activityQuant - if activityQuant: - example = example[:] - act = example[actCol] - for box in range(len(activityQuant)): - if act < activityQuant[box]: - act = box - break - else: - act = box + 1 - example[actCol] = act - return example - - def QuantizeExample(self, example, quantBounds=None): - """ quantizes an example - - **Arguments** - - - example: a data point (list, tuple or numpy array) - - - quantBounds: a list of quantization bounds, each quantbound is a - list of boundaries. If this argument is not provided, the composite - will use its own quantBounds - - **Returns** - - the quantized example as a list - - **Notes** - - - If _example_ is different in length from _quantBounds_, this will - assert out. - - - This is primarily intended for internal use - - """ - if quantBounds is None: - quantBounds = self.quantBounds - assert len(example) == len(quantBounds), 'example/quantBounds mismatch' - quantExample = [None] * len(example) - for i in range(len(quantBounds)): - bounds = quantBounds[i] - p = example[i] - if len(bounds): - for box in range(len(bounds)): - if p < bounds[box]: - p = box - break - else: - p = box + 1 - else: - if i != 0: - p = int(p) - quantExample[i] = p - return quantExample - - def MakeHistogram(self): - """ creates a histogram of error/count pairs - - **Returns** - - the histogram as a series of (error, count) 2-tuples - - """ - nExamples = len(self.modelList) - histo = [] - i = 1 - lastErr = self.errList[0] - countHere = self.countList[0] - eps = 0.001 - while i < nExamples: - if self.errList[i] - lastErr > eps: - histo.append((lastErr, countHere)) - lastErr = self.errList[i] - countHere = self.countList[i] - else: - countHere = countHere + self.countList[i] - i = i + 1 - - return histo - - def CollectVotes(self, example, quantExample, appendExample=0, onlyModels=None): - """ collects votes across every member of the composite for the given example - - **Arguments** - - - example: the example to be voted upon - - - quantExample: the quantized form of the example - - - appendExample: toggles saving the example on the models - - - onlyModels: if provided, this should be a sequence of model - indices. Only the specified models will be used in the - prediction. - - **Returns** - - a list with a vote from each member - - """ - if not onlyModels: - onlyModels = list(range(len(self))) - - votes = [-1] * len(self) - for i in onlyModels: - if self.quantizationRequirements[i]: - votes[i] = int( - round(self.modelList[i].ClassifyExample(quantExample, appendExamples=appendExample))) - else: - votes[i] = int( - round(self.modelList[i].ClassifyExample(example, appendExamples=appendExample))) - - return votes - - def ClassifyExample(self, example, threshold=0, appendExample=0, onlyModels=None): - """ classifies the given example using the entire composite - - **Arguments** - - - example: the data to be classified - - - threshold: if this is a number greater than zero, then a - classification will only be returned if the confidence is - above _threshold_. Anything lower is returned as -1. - - - appendExample: toggles saving the example on the models - - - onlyModels: if provided, this should be a sequence of model - indices. Only the specified models will be used in the - prediction. - - **Returns** - - a (result,confidence) tuple - - - **FIX:** - statistics sucks... I'm not seeing an obvious way to get - the confidence intervals. For that matter, I'm not seeing - an unobvious way. - - For now, this is just treated as a voting problem with the confidence - measure being the percent of models which voted for the winning result. - - """ - if self._mapOrder is not None: - example = self._RemapInput(example) - if self.GetActivityQuantBounds(): - example = self.QuantizeActivity(example) - if self.quantBounds is not None and 1 in self.quantizationRequirements: - quantExample = self.QuantizeExample(example, self.quantBounds) - else: - quantExample = [] - - if not onlyModels: - onlyModels = list(range(len(self))) - self.modelVotes = self.CollectVotes(example, quantExample, appendExample=appendExample, - onlyModels=onlyModels) - - votes = [0] * self.nPossibleVals[-1] - for i in onlyModels: - res = self.modelVotes[i] - votes[res] = votes[res] + self.countList[i] - - totVotes = sum(votes) - res = numpy.argmax(votes) - conf = float(votes[res]) / float(totVotes) - if conf > threshold: - return res, conf - else: - return -1, conf - - def GetVoteDetails(self): - """ returns the votes from the last classification - - This will be _None_ if nothing has yet be classified - """ - return self.modelVotes - - def _RemapInput(self, inputVect): - """ remaps the input so that it matches the expected internal ordering - - **Arguments** - - - inputVect: the input to be reordered - - **Returns** - - - a list with the reordered (and possible shorter) data - - **Note** - - - you must call _SetDescriptorNames()_ and _SetInputOrder()_ for this to work - - - this is primarily intended for internal use - - """ - order = self._mapOrder - - if order is None: - return inputVect - remappedInput = [None] * len(order) - - for i in range(len(order) - 1): - remappedInput[i] = inputVect[order[i]] - if order[-1] == -1: - remappedInput[-1] = 0 - else: - remappedInput[-1] = inputVect[order[-1]] - return remappedInput - - def GetInputOrder(self): - """ returns the input order (used in remapping inputs) - - """ - return self._mapOrder - - def SetInputOrder(self, colNames): - """ sets the input order - - **Arguments** - - - colNames: a list of the names of the data columns that will be passed in - - **Note** - - - you must call _SetDescriptorNames()_ first for this to work - - - if the local descriptor names do not appear in _colNames_, this will - raise an _IndexError_ exception. - """ - if type(colNames) != list: - colNames = list(colNames) - descs = [x.upper() for x in self.GetDescriptorNames()] - self._mapOrder = [None] * len(descs) - colNames = [x.upper() for x in colNames] - - # FIX: I believe that we're safe assuming that field 0 - # is always the label, and therefore safe to ignore errors, - # but this may not be the case - try: - self._mapOrder[0] = colNames.index(descs[0]) - except ValueError: - self._mapOrder[0] = 0 - - for i in range(1, len(descs) - 1): - try: - self._mapOrder[i] = colNames.index(descs[i]) - except ValueError: - raise ValueError('cannot find descriptor name: %s in set %s' % - (repr(descs[i]), repr(colNames))) - try: - self._mapOrder[-1] = colNames.index(descs[-1]) - except ValueError: - # ok, there's no obvious match for the final column (activity) - # We'll take the last one: - # self._mapOrder[-1] = len(descs)-1 - self._mapOrder[-1] = -1 - - def Grow(self, examples, attrs, nPossibleVals, buildDriver, pruner=None, nTries=10, pruneIt=0, - needsQuantization=1, progressCallback=None, **buildArgs): - """ Grows the composite - - **Arguments** - - - examples: a list of examples to be used in training - - - attrs: a list of the variables to be used in training - - - nPossibleVals: this is used to provide a list of the number - of possible values for each variable. It is used if the - local quantBounds have not been set (for example for when you - are working with data which is already quantized). - - - buildDriver: the function to call to build the new models - - - pruner: a function used to "prune" (reduce the complexity of) - the resulting model. - - - nTries: the number of new models to add - - - pruneIt: toggles whether or not pruning is done - - - needsQuantization: used to indicate whether or not this type of model - requires quantized data - - - **buildArgs: all other keyword args are passed to _buildDriver_ - - **Note** - - - new models are *added* to the existing ones - - """ - silent = buildArgs.get('silent', 0) - buildArgs['silent'] = 1 - buildArgs['calcTotalError'] = 1 - - if self._mapOrder is not None: - examples = map(self._RemapInput, examples) - if self.GetActivityQuantBounds(): - for i in range(len(examples)): - examples[i] = self.QuantizeActivity(examples[i]) - nPossibleVals[-1] = len(self.GetActivityQuantBounds()) + 1 - if self.nPossibleVals is None: - self.nPossibleVals = nPossibleVals[:] - if needsQuantization: - trainExamples = [None] * len(examples) - nPossibleVals = self.nPossibleVals - for i in range(len(examples)): - trainExamples[i] = self.QuantizeExample(examples[i], self.quantBounds) - else: - trainExamples = examples - - for i in range(nTries): - trainSet = None - - if (hasattr(self, '_modelFilterFrac')) and (self._modelFilterFrac != 0): - trainIdx, _ = DataUtils.FilterData(trainExamples, self._modelFilterVal, - self._modelFilterFrac, -1, indicesOnly=1) - trainSet = [trainExamples[x] for x in trainIdx] - - else: - trainSet = trainExamples - - # print("Training model %i with %i out of %i examples"%(i, len(trainSet), len(trainExamples))) - model, frac = buildDriver(*(trainSet, attrs, nPossibleVals), **buildArgs) - if pruneIt: - model, frac2 = pruner(model, model.GetTrainingExamples(), model.GetTestExamples(), - minimizeTestErrorOnly=0) - frac = frac2 - if (hasattr(self, '_modelFilterFrac') and self._modelFilterFrac != 0 - and hasattr(model, '_trainIndices')): - # correct the model's training indices: - trainIndices = [trainIdx[x] for x in model._trainIndices] - model._trainIndices = trainIndices - - self.AddModel(model, frac, needsQuantization) - if not silent and (nTries < 10 or i % (nTries / 10) == 0): - print('Cycle: % 4d' % (i)) - if progressCallback is not None: - progressCallback(i) - - def ClearModelExamples(self): - for i in range(len(self)): - m = self.GetModel(i) - try: - m.ClearExamples() - except AttributeError: - pass - - def Pickle(self, fileName='foo.pkl', saveExamples=0): - """ Writes this composite off to a file so that it can be easily loaded later - - **Arguments** - - - fileName: the name of the file to be written - - - saveExamples: if this is zero, the individual models will have - their stored examples cleared. - - """ - if not saveExamples: - self.ClearModelExamples() - - pFile = open(fileName, 'wb+') - pickle.dump(self, pFile, 1) - pFile.close() - - def AddModel(self, model, error, needsQuantization=1): - """ Adds a model to the composite - - **Arguments** - - - model: the model to be added - - - error: the model's error - - - needsQuantization: a toggle to indicate whether or not this model - requires quantized inputs - - **NOTE** - - - this can be used as an alternative to _Grow()_ if you already have - some models constructed - - - the errList is run as an accumulator, - you probably want to call _AverageErrors_ after finishing the forest - - """ - if model in self.modelList: - try: - idx = self.modelList.index(model) - except ValueError: - # FIX: we should never get here, but sometimes we do anyway - self.modelList.append(model) - self.errList.append(error) - self.countList.append(1) - self.quantizationRequirements.append(needsQuantization) - else: - self.errList[idx] = self.errList[idx] + error - self.countList[idx] = self.countList[idx] + 1 - else: - self.modelList.append(model) - self.errList.append(error) - self.countList.append(1) - self.quantizationRequirements.append(needsQuantization) - - def AverageErrors(self): - """ convert local summed error to average error - - """ - self.errList = list(map(lambda x, y: x / y, self.errList, self.countList)) - - def SortModels(self, sortOnError=True): - """ sorts the list of models - - **Arguments** - - sortOnError: toggles sorting on the models' errors rather than their counts - - - """ - if sortOnError: - order = numpy.argsort(self.errList) - else: - order = numpy.argsort(self.countList) - - # these elaborate contortions are required because, at the time this - # code was written, Numeric arrays didn't unpickle so well... - # print(order,sortOnError,self.errList,self.countList) - self.modelList = [self.modelList[x] for x in order] - self.countList = [self.countList[x] for x in order] - self.errList = [self.errList[x] for x in order] - - def GetModel(self, i): - """ returns a particular model - - """ - return self.modelList[i] - - def SetModel(self, i, val): - """ replaces a particular model - - **Note** - - This is included for the sake of completeness, but you need to be - *very* careful when you use it. - - """ - self.modelList[i] = val - - def GetCount(self, i): - """ returns the count of the _i_th model - - """ - return self.countList[i] - - def SetCount(self, i, val): - """ sets the count of the _i_th model - - """ - self.countList[i] = val - - def GetError(self, i): - """ returns the error of the _i_th model - - """ - return self.errList[i] - - def SetError(self, i, val): - """ sets the error of the _i_th model - - """ - self.errList[i] = val - - def GetDataTuple(self, i): - """ returns all relevant data about a particular model - - **Arguments** - - i: an integer indicating which model should be returned - - **Returns** - - a 3-tuple consisting of: - - 1) the model - - 2) its count - - 3) its error - """ - return (self.modelList[i], self.countList[i], self.errList[i]) - - def SetDataTuple(self, i, tup): - """ sets all relevant data for a particular tree in the forest - - **Arguments** - - - i: an integer indicating which model should be returned - - - tup: a 3-tuple consisting of: - - 1) the model - - 2) its count - - 3) its error - - **Note** - - This is included for the sake of completeness, but you need to be - *very* careful when you use it. - - """ - self.modelList[i], self.countList[i], self.errList[i] = tup - - def GetAllData(self): - """ Returns everything we know - - **Returns** - - a 3-tuple consisting of: - - 1) our list of models - - 2) our list of model counts - - 3) our list of model errors - - """ - return (self.modelList, self.countList, self.errList) - - def __len__(self): - """ allows len(composite) to work - - """ - return len(self.modelList) - - def __getitem__(self, which): - """ allows composite[i] to work, returns the data tuple - - """ - return self.GetDataTuple(which) - - def __str__(self): - """ returns a string representation of the composite - - """ - outStr = 'Composite\n' - for i in range(len(self.modelList)): - outStr = (outStr + ' Model %4d: %5d occurrences %%%5.2f average error\n' % - (i, self.countList[i], 100. * self.errList[i])) - return outStr - - -if __name__ == '__main__': # pragma: nocover - if 0: - from rdkit.ML.DecTree import DecTree - c = Composite() - n = DecTree.DecTreeNode(None, 'foo') - c.AddModel(n, 0.5) - c.AddModel(n, 0.5) - c.AverageErrors() - c.SortModels() - print(c) - - qB = [[], [.5, 1, 1.5]] - exs = [['foo', 0], ['foo', .4], ['foo', .6], ['foo', 1.1], ['foo', 2.0]] - print('quantBounds:', qB) - for ex in exs: - q = c.QuantizeExample(ex, qB) - print(ex, q) - else: - pass diff --git a/rdkit/ML/Composite/UnitTestComposite.py b/rdkit/ML/Composite/UnitTestComposite.py deleted file mode 100644 index 269cc7dd73f..00000000000 --- a/rdkit/ML/Composite/UnitTestComposite.py +++ /dev/null @@ -1,203 +0,0 @@ -# $Id$ -# -# Copyright (C) 2001-2008 greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -""" unit testing code for composite models - -""" -import io -import pickle -import unittest - -from rdkit import RDConfig -from rdkit.ML.Composite import Composite -from rdkit.ML.DecTree.DecTree import DecTreeNode as Node - - -class TestCase(unittest.TestCase): - - def setUp(self): - with open(RDConfig.RDCodeDir + '/ML/Composite/test_data/ferro.pkl', 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - self.examples = pickle.load(pklF) - self.varNames = [ - 'composition', 'max_atomic', 'has3d', 'has4d', 'has5d', 'elconc', 'atvol', 'isferro' - ] - self.qBounds = [[], [1.89, 3.53], [], [], [], [0.55, 0.73], [11.81, 14.52], []] - self.nPoss = [0, 3, 2, 2, 2, 3, 3, 2] - self.attrs = list(range(1, len(self.varNames) - 1)) - from rdkit.ML.Data import DataUtils - DataUtils.InitRandomNumbers((23, 43)) - - def testQuantize(self): - # testing data quantization - qBounds = [[], [1, 2, 3]] - examples = [['foo', 0], ['foo', 1.5], ['foo', 5.5], ['foo', 2.5]] - answers = [['foo', 0], ['foo', 1], ['foo', 3], ['foo', 2]] - nPoss = [0, 4] - composite = Composite.Composite() - composite.SetQuantBounds(qBounds, nPoss) - for i in range(len(examples)): - qEx = composite.QuantizeExample(examples[i]) - self.assertEqual(qEx, answers[i]) - - def testTreeGrow(self): - # testing tree-based composite - with open(RDConfig.RDCodeDir + '/ML/Composite/test_data/composite_base.pkl', 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - self.refCompos = pickle.load(pklF) - - composite = Composite.Composite() - composite._varNames = self.varNames - composite.SetQuantBounds(self.qBounds, self.nPoss) - from rdkit.ML.DecTree import CrossValidate - driver = CrossValidate.CrossValidationDriver - pruner = None - composite.Grow(self.examples, self.attrs, [], buildDriver=driver, pruner=pruner, nTries=100, - silent=1) - composite.AverageErrors() - composite.SortModels(sortOnError=False) - self.assertEqual(composite.countList, sorted(composite.countList)) - self.assertNotEqual(composite.errList, sorted(composite.errList)) - composite.SortModels() - self.assertNotEqual(composite.countList, sorted(composite.countList)) - self.assertEqual(composite.errList, sorted(composite.errList)) - - # with open(RDConfig.RDCodeDir+'/ML/Composite/test_data/composite_base.pkl','wb') as pklF: - # pickle.dump(composite,pklF) - - self.treeComposite = composite - self.assertEqual(len(composite), len(self.refCompos)) - for i in range(len(composite)): - t1, c1, e1 = composite[i] - t2, c2, e2 = self.refCompos[i] - self.assertEqual(e1, e2) - # we used to check for equality here, but since there are redundant errors, - # that's non-trivial. - # assert t1 == t2, 'tree mismatch' - # assert c1 == c2, 'count mismatch' - s = str(composite) - self.assertIn('Composite', s) - self.assertIn('Model', s) - self.assertIn('error', s) - - def testErrorEstimate(self): - # testing out-of-bag error estimates - - compos = Composite.Composite() - compos.SetQuantBounds([(0.5, ), (0.5, ), (0.5, ), []], [2, 2, 2, 2]) - compos.SetDescriptorNames(('D0', 'D1', 'D2', 'Act')) - compos.SetInputOrder(('ID', 'D0', 'D1', 'D2', 'Act')) - data = [['A', 0, 0, 0, 0], ['B', 1, 0, 0, 1], ['C', 0, 1, 0, 0], ['D', 1, 1, 1, 1]] - - # - # Build and validate three simple trees: - # - t1 = Node(None, 'D0', 0) - n = Node(t1, 'D1', 1) - t1.AddChildNode(n) - n.AddChildNode(Node(n, '0', 0, isTerminal=1)) - n.AddChildNode(Node(n, '1', 1, isTerminal=1)) - n = Node(t1, 'D2', 2) - t1.AddChildNode(n) - n.AddChildNode(Node(n, '1', 1, isTerminal=1)) - n.AddChildNode(Node(n, '0', 0, isTerminal=1)) - assert t1.ClassifyExample(data[0][1:]) == 0 - assert t1.ClassifyExample(data[1][1:]) == 1 - assert t1.ClassifyExample(data[2][1:]) == 1 - assert t1.ClassifyExample(data[3][1:]) == 0 - t1._trainIndices = (0, 1) - compos.AddModel(t1, .5) - - t2 = Node(None, 'D1', 1) - n = Node(t2, 'D0', 0) - t2.AddChildNode(n) - n.AddChildNode(Node(n, '0', 0, isTerminal=1)) - n.AddChildNode(Node(n, '1', 1, isTerminal=1)) - n = Node(t2, 'D2', 2) - t2.AddChildNode(n) - n.AddChildNode(Node(n, '0', 0, isTerminal=1)) - n.AddChildNode(Node(n, '1', 1, isTerminal=1)) - assert t2.ClassifyExample(data[0][1:]) == 0 - assert t2.ClassifyExample(data[1][1:]) == 1 - assert t2.ClassifyExample(data[2][1:]) == 0 - assert t2.ClassifyExample(data[3][1:]) == 1 - t2._trainIndices = (1, 2) - compos.AddModel(t2, 0.0) - - t3 = Node(None, 'D0', 0) - n = Node(t3, 'D2', 2) - t3.AddChildNode(n) - n.AddChildNode(Node(n, '0', 0, isTerminal=1)) - n.AddChildNode(Node(n, '1', 1, isTerminal=1)) - n = Node(t3, 'D1', 1) - t3.AddChildNode(n) - n.AddChildNode(Node(n, '0', 0, isTerminal=1)) - n.AddChildNode(Node(n, '1', 1, isTerminal=1)) - assert t3.ClassifyExample(data[0][1:]) == 0 - assert t3.ClassifyExample(data[1][1:]) == 0 - assert t3.ClassifyExample(data[2][1:]) == 0 - assert t3.ClassifyExample(data[3][1:]) == 1 - t3._trainIndices = (2, 3) - compos.AddModel(t3, 0.25) - - # - # validate the composite itself: - # - pred, conf = compos.ClassifyExample(data[0]) - assert pred == 0 - assert conf == 1.0 - pred, conf = compos.ClassifyExample(data[1]) - assert pred == 1 - assert conf == 2. / 3. - pred, conf = compos.ClassifyExample(data[2]) - assert pred == 0 - assert conf == 2. / 3. - pred, conf = compos.ClassifyExample(data[3]) - assert pred == 1 - assert conf == 2. / 3. - - self.assertEqual(compos.GetVoteDetails(), [0, 1, 1]) - self.assertEqual(compos.GetInputOrder(), [1, 2, 3, 4]) - - # - # now test the out-of-bag calculation: - # - pred, conf = compos.ClassifyExample(data[0], onlyModels=(1, 2)) - assert pred == 0 - assert conf == 1.0 - pred, conf = compos.ClassifyExample(data[1], onlyModels=(2, )) - assert pred == 0 - assert conf == 1.0 - pred, conf = compos.ClassifyExample(data[2], onlyModels=(0, )) - assert pred == 1 - assert conf == 1.0 - pred, conf = compos.ClassifyExample(data[3], onlyModels=(0, 1)) - assert pred == 0 - assert conf == 0.5 - - compos.ClearModelExamples() - - def test_exceptions(self): - compos = Composite.Composite() - compos.SetQuantBounds([(0.5, ), (0.5, ), (0.5, ), []], [2, 2, 2, 2]) - compos.SetDescriptorNames(('ID', 'D0', 'D1', 'D2', 'Act')) - compos.SetInputOrder(('ID', 'D2', 'D1', 'D0', 'Act')) - self.assertEqual(compos._mapOrder, [0, 3, 2, 1, 4]) - # Probes caught exception for ID - compos.SetInputOrder(('D2', 'D1', 'D0', 'Act')) - self.assertEqual(compos._mapOrder, [0, 2, 1, 0, 3]) - # Probes caught exception for Act - compos.SetInputOrder(('ID', 'D2', 'D1', 'D0')) - self.assertEqual(compos._mapOrder, [0, 3, 2, 1, -1]) - - self.assertRaises(ValueError, compos.SetInputOrder, ('Unknown', 'D0')) - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/Composite/__init__.py b/rdkit/ML/Composite/__init__.py deleted file mode 100644 index eeba8e62bbd..00000000000 --- a/rdkit/ML/Composite/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) diff --git a/rdkit/ML/Composite/test_data/composite_base.both.pkl b/rdkit/ML/Composite/test_data/composite_base.both.pkl deleted file mode 100644 index bc92765fbf2..00000000000 Binary files a/rdkit/ML/Composite/test_data/composite_base.both.pkl and /dev/null differ diff --git a/rdkit/ML/Composite/test_data/composite_base.net.pkl b/rdkit/ML/Composite/test_data/composite_base.net.pkl deleted file mode 100644 index a2a8ba60e9b..00000000000 --- a/rdkit/ML/Composite/test_data/composite_base.net.pkl +++ /dev/null @@ -1,2565 +0,0 @@ -ccopy_reg -_reconstructor -p1 -(crdkit.ML.Composite.Composite -Composite -p2 -c__builtin__ -object -p3 -NtRp4 -(dp5 -S'_mapOrder' -p6 -NsS'_descNames' -p7 -(lp8 -sS'modelVotes' -p9 -(lp10 -sS'activityQuant' -p11 -(lp12 -sS'modelList' -p13 -(lp14 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p15 -(dp16 -S'_nResultCodes' -p17 -I2 -sS'name' -p18 -S'Var: 5' -p19 -sS'parent' -p20 -NsS'level' -p21 -I0 -sS'badExamples' -p22 -(lp23 -sS'label' -p24 -I5 -sS'terminalNode' -p25 -I0 -sS'trainingExamples' -p26 -(lp27 -(lp28 -S'CrPt3' -p29 -aI2 -aI1 -aI0 -aI1 -aI1 -aI2 -aI1 -aa(lp30 -S'FePt3' -p31 -aI2 -aI1 -aI0 -aI1 -aI1 -aI1 -aI1 -aa(lp32 -S'FeCo' -p33 -aI2 -aI1 -aI0 -aI0 -aI2 -aI0 -aI1 -aa(lp34 -S'AlFe3' -p35 -aI2 -aI1 -aI0 -aI0 -aI1 -aI1 -aI1 -aa(lp36 -S'Fe' -p37 -aI2 -aI1 -aI0 -aI0 -aI1 -aI1 -aI1 -aa(lp38 -S'HfV2' -p39 -aI1 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp40 -S'HfNi2' -p41 -aI1 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp42 -S'HfMo2' -p43 -aI1 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp44 -S'Cr2Zr' -p45 -aI2 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp46 -S'Cr2Ta' -p47 -aI2 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp48 -S'Cr2Nb' -p49 -aI2 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp50 -S'Co2Y' -p51 -aI1 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp52 -S'IrZr' -p53 -aI0 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp54 -S'IrSc' -p55 -aI0 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp56 -S'HfTc' -p57 -aI1 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp58 -S'Ir2Sc' -p59 -aI0 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp60 -S'Ir2Y' -p61 -aI0 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp62 -S'Ir2Zr' -p63 -aI0 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp64 -S'Ir' -p65 -aI0 -aI0 -aI0 -aI1 -aI1 -aI1 -aI0 -aa(lp66 -S'CoTi2' -p67 -aI1 -aI1 -aI0 -aI0 -aI0 -aI2 -aI0 -aa(lp68 -S'IrV3' -p69 -aI1 -aI1 -aI0 -aI1 -aI0 -aI1 -aI0 -aa(lp70 -S'Cr3Ru' -p71 -aI2 -aI1 -aI1 -aI0 -aI0 -aI1 -aI0 -aa(lp72 -S'Cr3Pt' -p73 -aI2 -aI1 -aI0 -aI1 -aI0 -aI1 -aI0 -aa(lp74 -S'CoV3' -p75 -aI1 -aI1 -aI0 -aI0 -aI0 -aI1 -aI0 -aa(lp76 -S'Cr3Os' -p77 -aI2 -aI1 -aI0 -aI1 -aI0 -aI1 -aI0 -aa(lp78 -S'Cr3Ir' -p79 -aI2 -aI1 -aI0 -aI1 -aI0 -aI1 -aI0 -aa(lp80 -S'W' -aI1 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp81 -S'Ta' -p82 -aI0 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp83 -S'Nb' -p84 -aI1 -aI0 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp85 -S'Mo' -p86 -aI1 -aI0 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp87 -S'HfPt' -p88 -aI0 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp89 -S'HfOs' -p90 -aI1 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp91 -S'Ir3V' -p92 -aI1 -aI1 -aI0 -aI1 -aI1 -aI1 -aI0 -aa(lp93 -S'Ir3Ti' -p94 -aI0 -aI1 -aI0 -aI1 -aI0 -aI1 -aI0 -aa(lp95 -S'Ir3Ta' -p96 -aI0 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp97 -S'HfRh3' -p98 -aI0 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp99 -S'HfIr3' -p100 -aI0 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp101 -S'Rh' -p102 -aI0 -aI0 -aI1 -aI0 -aI1 -aI1 -aI0 -aa(lp103 -S'Pd' -p104 -aI0 -aI0 -aI1 -aI0 -aI1 -aI2 -aI0 -aa(lp105 -S'CoTi' -p106 -aI1 -aI1 -aI0 -aI0 -aI0 -aI1 -aI0 -aa(lp107 -S'Cr2Hf' -p108 -aI2 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aasS'examples' -p109 -(lp110 -sS'_trainIndices' -p111 -(lp112 -I2 -aI5 -aI6 -aI7 -aI8 -aI10 -aI11 -aI12 -aI13 -aI15 -aI16 -aI18 -aI20 -aI22 -aI23 -aI24 -aI25 -aI26 -aI27 -aI29 -aI32 -aI35 -aI37 -aI38 -aI39 -aI40 -aI41 -aI43 -aI44 -aI45 -aI47 -aI49 -aI50 -aI51 -aI52 -aI54 -aI55 -aI56 -aI57 -aI59 -aI60 -asS'data' -p113 -F0.53494369909710671 -sS'children' -p114 -(lp115 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p116 -(dp117 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp118 -sg24 -cnumpy.core.multiarray -scalar -p119 -(cnumpy -dtype -p120 -(S'i4' -I0 -I1 -tRp121 -(I3 -S'<' -NNNI-1 -I-1 -I0 -tbS'\x00\x00\x00\x00' -tRp122 -sg25 -I1 -sg26 -(lp123 -sg109 -(lp124 -sg113 -F0 -sg114 -(lp125 -sS'testExamples' -p126 -(lp127 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p128 -(dp129 -g18 -S'Var: 1' -p130 -sg20 -Nsg21 -I1 -sg22 -(lp131 -sg24 -I1 -sg25 -I0 -sg26 -(lp132 -sg109 -(lp133 -sg113 -F1 -sg114 -(lp134 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p135 -(dp136 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp137 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp138 -sg25 -I1 -sg26 -(lp139 -sg109 -(lp140 -sg113 -F0 -sg114 -(lp141 -sg126 -(lp142 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p143 -(dp144 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp145 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp146 -sg25 -I1 -sg26 -(lp147 -sg109 -(lp148 -sg113 -F0 -sg114 -(lp149 -sg126 -(lp150 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p151 -(dp152 -g18 -S'1' -sg20 -Nsg21 -I1 -sg22 -(lp153 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp154 -sg25 -I1 -sg26 -(lp155 -sg109 -(lp156 -sg113 -F0 -sg114 -(lp157 -sg126 -(lp158 -sbasg126 -(lp159 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p160 -(dp161 -g18 -S'1' -sg20 -Nsg21 -I1 -sg22 -(lp162 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp163 -sg25 -I1 -sg26 -(lp164 -sg109 -(lp165 -sg113 -F0 -sg114 -(lp166 -sg126 -(lp167 -sbasg126 -(lp168 -(lp169 -S'Fe3Pd' -p170 -aI2 -aI1 -aI1 -aI0 -aI1 -aI1 -aI1 -aa(lp171 -S'Fe3Pt' -p172 -aI2 -aI1 -aI0 -aI1 -aI1 -aI1 -aI1 -aa(lp173 -S'Ni' -p174 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aI1 -aa(lp175 -S'Mn' -p176 -aI2 -aI1 -aI0 -aI0 -aI1 -aI1 -aI1 -aa(lp177 -S'FeNi3' -p178 -aI2 -aI1 -aI0 -aI0 -aI2 -aI0 -aI1 -aa(lp179 -S'Cr2Ti' -p180 -aI2 -aI1 -aI0 -aI0 -aI0 -aI2 -aI0 -aa(lp181 -S'HfW2' -p182 -aI1 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp183 -S'Co2Nb' -p184 -aI1 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp185 -S'IrY' -p186 -aI0 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp187 -S'CoZr' -p188 -aI1 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp189 -S'CoSc' -p190 -aI1 -aI1 -aI0 -aI0 -aI0 -aI2 -aI0 -aa(lp191 -S'CoHf' -p192 -aI1 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp193 -S'IrTi3' -p194 -aI0 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp195 -S'IrNb3' -p196 -aI1 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp197 -S'Cr3Rh' -p198 -aI2 -aI1 -aI1 -aI0 -aI0 -aI1 -aI0 -aa(lp199 -S'V' -aI1 -aI1 -aI0 -aI0 -aI0 -aI1 -aI0 -aa(lp200 -S'HfRu' -p201 -aI1 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp202 -S'Cr' -p203 -aI2 -aI1 -aI0 -aI0 -aI0 -aI1 -aI0 -aa(lp204 -S'Ir3Nb' -p205 -aI1 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp206 -S'Pt' -p207 -aI0 -aI0 -aI0 -aI1 -aI1 -aI2 -aI0 -aasba(irdkit.ML.DecTree.DecTree -DecTreeNode -p208 -(dp209 -g17 -I2 -sg18 -S'Var: 5' -p210 -sg20 -Nsg21 -I0 -sg22 -(lp211 -g91 -asg24 -I5 -sg25 -I0 -sg26 -(lp212 -g169 -ag173 -ag175 -ag30 -ag32 -ag36 -ag177 -ag38 -ag40 -ag42 -ag44 -ag179 -ag48 -ag181 -ag50 -ag183 -ag52 -ag185 -ag54 -ag56 -ag58 -ag60 -ag62 -ag187 -ag66 -ag189 -ag68 -ag193 -ag195 -ag197 -ag72 -ag74 -ag76 -ag80 -ag199 -ag83 -ag85 -ag87 -ag202 -ag89 -ag93 -ag95 -ag97 -ag99 -ag101 -ag103 -ag206 -asg109 -(lp213 -sg111 -(lp214 -I0 -aI3 -aI4 -aI5 -aI6 -aI8 -aI9 -aI10 -aI11 -aI12 -aI13 -aI14 -aI16 -aI17 -aI18 -aI19 -aI20 -aI21 -aI22 -aI23 -aI24 -aI25 -aI26 -aI28 -aI29 -aI30 -aI32 -aI33 -aI34 -aI36 -aI37 -aI38 -aI39 -aI41 -aI42 -aI44 -aI45 -aI47 -aI48 -aI49 -aI51 -aI52 -aI54 -aI55 -aI56 -aI57 -aI58 -asg113 -F0.60717165487130287 -sg114 -(lp215 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p216 -(dp217 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp218 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp219 -sg25 -I1 -sg26 -(lp220 -sg109 -(lp221 -sg113 -F0 -sg114 -(lp222 -sg126 -(lp223 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p224 -(dp225 -g18 -S'Var: 1' -p226 -sg20 -Nsg21 -I1 -sg22 -(lp227 -sg24 -I1 -sg25 -I0 -sg26 -(lp228 -sg109 -(lp229 -sg113 -F0.98522813603425152 -sg114 -(lp230 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p231 -(dp232 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp233 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp234 -sg25 -I1 -sg26 -(lp235 -sg109 -(lp236 -sg113 -F0 -sg114 -(lp237 -sg126 -(lp238 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p239 -(dp240 -g18 -S'1' -sg20 -g224 -sg21 -I1 -sg22 -(lp241 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp242 -sg25 -I1 -sg26 -(lp243 -sg109 -(lp244 -sg113 -F0 -sg114 -(lp245 -sg126 -(lp246 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p247 -(dp248 -g18 -S'1' -sg20 -Nsg21 -I1 -sg22 -(lp249 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp250 -sg25 -I1 -sg26 -(lp251 -sg109 -(lp252 -sg113 -F0 -sg114 -(lp253 -sg126 -(lp254 -sbasg126 -(lp255 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p256 -(dp257 -g18 -S'1' -sg20 -Nsg21 -I1 -sg22 -(lp258 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp259 -sg25 -I1 -sg26 -(lp260 -sg109 -(lp261 -sg113 -F0 -sg114 -(lp262 -sg126 -(lp263 -sbasg126 -(lp264 -g171 -ag28 -ag34 -ag46 -ag64 -ag191 -ag70 -ag78 -ag81 -ag200 -ag91 -ag204 -ag105 -ag107 -asba(irdkit.ML.DecTree.DecTree -DecTreeNode -p265 -(dp266 -g17 -I2 -sg18 -S'Var: 5' -p267 -sg20 -Nsg21 -I0 -sg22 -(lp268 -g173 -ag32 -ag177 -asg24 -I5 -sg25 -I0 -sg26 -(lp269 -g169 -ag28 -ag175 -ag34 -ag38 -ag40 -ag42 -ag44 -ag179 -ag48 -ag181 -ag50 -ag183 -ag52 -ag185 -ag54 -ag58 -ag60 -ag62 -ag64 -ag68 -ag193 -ag195 -ag70 -ag72 -ag74 -ag76 -ag199 -ag81 -ag85 -ag202 -ag89 -ag93 -ag204 -ag97 -ag101 -ag103 -ag206 -ag107 -asg109 -(lp270 -sg111 -(lp271 -I0 -aI2 -aI4 -aI7 -aI10 -aI11 -aI12 -aI13 -aI14 -aI16 -aI17 -aI18 -aI19 -aI20 -aI21 -aI22 -aI24 -aI25 -aI26 -aI27 -aI32 -aI33 -aI34 -aI35 -aI37 -aI38 -aI39 -aI42 -aI43 -aI45 -aI48 -aI49 -aI51 -aI53 -aI54 -aI56 -aI57 -aI58 -aI60 -asg113 -F0.47707130621932969 -sg114 -(lp272 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p273 -(dp274 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp275 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp276 -sg25 -I1 -sg26 -(lp277 -sg109 -(lp278 -sg113 -F0 -sg114 -(lp279 -sg126 -(lp280 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p281 -(dp282 -g18 -S'Var: 1' -p283 -sg20 -Nsg21 -I1 -sg22 -(lp284 -sg24 -I1 -sg25 -I0 -sg26 -(lp285 -sg109 -(lp286 -sg113 -F1 -sg114 -(lp287 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p288 -(dp289 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp290 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp291 -sg25 -I1 -sg26 -(lp292 -sg109 -(lp293 -sg113 -F0 -sg114 -(lp294 -sg126 -(lp295 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p296 -(dp297 -g18 -S'0' -sg20 -g281 -sg21 -I1 -sg22 -(lp298 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp299 -sg25 -I1 -sg26 -(lp300 -sg109 -(lp301 -sg113 -F0 -sg114 -(lp302 -sg126 -(lp303 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p304 -(dp305 -g18 -S'1' -sg20 -Nsg21 -I1 -sg22 -(lp306 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp307 -sg25 -I1 -sg26 -(lp308 -sg109 -(lp309 -sg113 -F0 -sg114 -(lp310 -sg126 -(lp311 -sbasg126 -(lp312 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p313 -(dp314 -g18 -S'Var: 1' -p315 -sg20 -Nsg21 -I1 -sg22 -(lp316 -sg24 -I1 -sg25 -I0 -sg26 -(lp317 -sg109 -(lp318 -sg113 -F0 -sg114 -(lp319 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p320 -(dp321 -g18 -S'0' -sg20 -g313 -sg21 -I1 -sg22 -(lp322 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp323 -sg25 -I1 -sg26 -(lp324 -sg109 -(lp325 -sg113 -F0 -sg114 -(lp326 -sg126 -(lp327 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p328 -(dp329 -g18 -S'0' -sg20 -g313 -sg21 -I1 -sg22 -(lp330 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp331 -sg25 -I1 -sg26 -(lp332 -sg109 -(lp333 -sg113 -F0 -sg114 -(lp334 -sg126 -(lp335 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p336 -(dp337 -g18 -S'0' -sg20 -g313 -sg21 -I1 -sg22 -(lp338 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp339 -sg25 -I1 -sg26 -(lp340 -sg109 -(lp341 -sg113 -F0 -sg114 -(lp342 -sg126 -(lp343 -sbasg126 -(lp344 -sbasg126 -(lp345 -g171 -ag173 -ag30 -ag32 -ag36 -ag177 -ag46 -ag56 -ag187 -ag66 -ag189 -ag191 -ag197 -ag78 -ag80 -ag83 -ag200 -ag87 -ag91 -ag95 -ag99 -ag105 -asba(irdkit.ML.DecTree.DecTree -DecTreeNode -p346 -(dp347 -g17 -I2 -sg18 -S'Var: 5' -p348 -sg20 -Nsg21 -I0 -sg22 -(lp349 -g173 -ag32 -ag177 -ag91 -asg24 -I5 -sg25 -I0 -sg26 -(lp350 -g169 -ag171 -ag175 -ag30 -ag34 -ag36 -ag38 -ag40 -ag44 -ag179 -ag46 -ag181 -ag50 -ag52 -ag185 -ag56 -ag58 -ag62 -ag64 -ag189 -ag191 -ag193 -ag197 -ag74 -ag76 -ag78 -ag80 -ag81 -ag83 -ag85 -ag200 -ag202 -ag89 -ag93 -ag95 -ag204 -ag97 -ag99 -ag101 -ag103 -ag206 -asg109 -(lp351 -sg111 -(lp352 -I0 -aI1 -aI4 -aI5 -aI7 -aI8 -aI10 -aI11 -aI13 -aI14 -aI15 -aI17 -aI18 -aI20 -aI21 -aI23 -aI24 -aI26 -aI27 -aI30 -aI31 -aI33 -aI36 -aI38 -aI39 -aI40 -aI41 -aI43 -aI44 -aI45 -aI46 -aI48 -aI49 -aI51 -aI52 -aI53 -aI54 -aI55 -aI56 -aI57 -aI58 -asg113 -F0.60060857541318702 -sg114 -(lp353 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p354 -(dp355 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp356 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp357 -sg25 -I1 -sg26 -(lp358 -sg109 -(lp359 -sg113 -F0 -sg114 -(lp360 -sg126 -(lp361 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p362 -(dp363 -g18 -S'Var: 1' -p364 -sg20 -Nsg21 -I1 -sg22 -(lp365 -sg24 -I1 -sg25 -I0 -sg26 -(lp366 -sg109 -(lp367 -sg113 -F0.97095059445466858 -sg114 -(lp368 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p369 -(dp370 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp371 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp372 -sg25 -I1 -sg26 -(lp373 -sg109 -(lp374 -sg113 -F0 -sg114 -(lp375 -sg126 -(lp376 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p377 -(dp378 -g18 -S'1' -sg20 -g362 -sg21 -I1 -sg22 -(lp379 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp380 -sg25 -I1 -sg26 -(lp381 -sg109 -(lp382 -sg113 -F0 -sg114 -(lp383 -sg126 -(lp384 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p385 -(dp386 -g18 -S'1' -sg20 -Nsg21 -I1 -sg22 -(lp387 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp388 -sg25 -I1 -sg26 -(lp389 -sg109 -(lp390 -sg113 -F0 -sg114 -(lp391 -sg126 -(lp392 -sbasg126 -(lp393 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p394 -(dp395 -g18 -S'Var: 1' -p396 -sg20 -Nsg21 -I1 -sg22 -(lp397 -sg24 -I1 -sg25 -I0 -sg26 -(lp398 -sg109 -(lp399 -sg113 -F0 -sg114 -(lp400 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p401 -(dp402 -g18 -S'0' -sg20 -g394 -sg21 -I1 -sg22 -(lp403 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp404 -sg25 -I1 -sg26 -(lp405 -sg109 -(lp406 -sg113 -F0 -sg114 -(lp407 -sg126 -(lp408 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p409 -(dp410 -g18 -S'0' -sg20 -g394 -sg21 -I1 -sg22 -(lp411 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp412 -sg25 -I1 -sg26 -(lp413 -sg109 -(lp414 -sg113 -F0 -sg114 -(lp415 -sg126 -(lp416 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p417 -(dp418 -g18 -S'0' -sg20 -g394 -sg21 -I1 -sg22 -(lp419 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp420 -sg25 -I1 -sg26 -(lp421 -sg109 -(lp422 -sg113 -F0 -sg114 -(lp423 -sg126 -(lp424 -sbasg126 -(lp425 -sbasg126 -(lp426 -g28 -ag173 -ag32 -ag177 -ag42 -ag48 -ag183 -ag54 -ag60 -ag187 -ag66 -ag68 -ag195 -ag70 -ag72 -ag199 -ag87 -ag91 -ag105 -ag107 -asba(irdkit.ML.DecTree.DecTree -DecTreeNode -p427 -(dp428 -g17 -I2 -sg18 -S'Var: 5' -p429 -sg20 -Nsg21 -I0 -sg22 -(lp430 -g64 -ag101 -ag103 -ag206 -asg24 -I5 -sg25 -I0 -sg26 -(lp431 -g171 -ag28 -ag173 -ag175 -ag30 -ag34 -ag36 -ag177 -ag42 -ag44 -ag46 -ag181 -ag56 -ag58 -ag60 -ag62 -ag187 -ag66 -ag189 -ag193 -ag70 -ag72 -ag74 -ag76 -ag78 -ag199 -ag85 -ag87 -ag202 -ag91 -ag95 -ag204 -ag97 -ag105 -asg109 -(lp432 -sg111 -(lp433 -I1 -aI2 -aI3 -aI4 -aI5 -aI7 -aI8 -aI9 -aI12 -aI13 -aI15 -aI17 -aI23 -aI24 -aI25 -aI26 -aI28 -aI29 -aI30 -aI33 -aI35 -aI37 -aI38 -aI39 -aI40 -aI42 -aI45 -aI47 -aI48 -aI50 -aI52 -aI53 -aI54 -aI59 -asg113 -F0.78712658620126896 -sg114 -(lp434 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p435 -(dp436 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp437 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp438 -sg25 -I1 -sg26 -(lp439 -sg109 -(lp440 -sg113 -F0 -sg114 -(lp441 -sg126 -(lp442 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p443 -(dp444 -g18 -S'Var: 1' -p445 -sg20 -Nsg21 -I1 -sg22 -(lp446 -sg24 -I1 -sg25 -I0 -sg26 -(lp447 -sg109 -(lp448 -sg113 -F0.59167277858232747 -sg114 -(lp449 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p450 -(dp451 -g18 -S'1' -sg20 -g443 -sg21 -I1 -sg22 -(lp452 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp453 -sg25 -I1 -sg26 -(lp454 -sg109 -(lp455 -sg113 -F0 -sg114 -(lp456 -sg126 -(lp457 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p458 -(dp459 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp460 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp461 -sg25 -I1 -sg26 -(lp462 -sg109 -(lp463 -sg113 -F0 -sg114 -(lp464 -sg126 -(lp465 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p466 -(dp467 -g18 -S'1' -sg20 -Nsg21 -I1 -sg22 -(lp468 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp469 -sg25 -I1 -sg26 -(lp470 -sg109 -(lp471 -sg113 -F0 -sg114 -(lp472 -sg126 -(lp473 -sbasg126 -(lp474 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p475 -(dp476 -g18 -S'1' -sg20 -Nsg21 -I1 -sg22 -(lp477 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp478 -sg25 -I1 -sg26 -(lp479 -sg109 -(lp480 -sg113 -F0 -sg114 -(lp481 -sg126 -(lp482 -sbasg126 -(lp483 -g169 -ag32 -ag38 -ag40 -ag179 -ag48 -ag50 -ag183 -ag52 -ag185 -ag54 -ag64 -ag191 -ag68 -ag195 -ag197 -ag80 -ag81 -ag83 -ag200 -ag89 -ag93 -ag99 -ag101 -ag103 -ag206 -ag107 -asba(irdkit.ML.DecTree.DecTree -DecTreeNode -p484 -(dp485 -g17 -I2 -sg18 -S'Var: 5' -p486 -sg20 -Nsg21 -I0 -sg22 -(lp487 -g64 -ag91 -ag101 -ag103 -ag206 -asg24 -I5 -sg25 -I0 -sg26 -(lp488 -g169 -ag171 -ag28 -ag173 -ag175 -ag34 -ag177 -ag42 -ag44 -ag46 -ag48 -ag181 -ag50 -ag183 -ag54 -ag58 -ag62 -ag187 -ag66 -ag189 -ag191 -ag68 -ag193 -ag195 -ag70 -ag197 -ag81 -ag83 -ag200 -ag87 -ag202 -ag89 -ag93 -ag95 -ag204 -ag97 -ag99 -ag105 -ag107 -asg109 -(lp489 -sg111 -(lp490 -I0 -aI1 -aI2 -aI3 -aI4 -aI7 -aI9 -aI12 -aI13 -aI15 -aI16 -aI17 -aI18 -aI19 -aI22 -aI24 -aI26 -aI28 -aI29 -aI30 -aI31 -aI32 -aI33 -aI34 -aI35 -aI36 -aI43 -aI44 -aI46 -aI47 -aI48 -aI49 -aI51 -aI52 -aI53 -aI54 -aI55 -aI59 -aI60 -asg113 -F0.67895389951857588 -sg114 -(lp491 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p492 -(dp493 -g18 -S'0' -sg20 -Nsg21 -I1 -sg22 -(lp494 -sg24 -g119 -(g121 -S'\x00\x00\x00\x00' -tRp495 -sg25 -I1 -sg26 -(lp496 -sg109 -(lp497 -sg113 -F0 -sg114 -(lp498 -sg126 -(lp499 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p500 -(dp501 -g18 -S'1' -sg20 -Nsg21 -I1 -sg22 -(lp502 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp503 -sg25 -I1 -sg26 -(lp504 -sg109 -(lp505 -sg113 -F0 -sg114 -(lp506 -sg126 -(lp507 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p508 -(dp509 -g18 -S'1' -sg20 -Nsg21 -I1 -sg22 -(lp510 -sg24 -g119 -(g121 -S'\x01\x00\x00\x00' -tRp511 -sg25 -I1 -sg26 -(lp512 -sg109 -(lp513 -sg113 -F0 -sg114 -(lp514 -sg126 -(lp515 -sbasg126 -(lp516 -g30 -ag32 -ag36 -ag38 -ag40 -ag179 -ag52 -ag185 -ag56 -ag60 -ag64 -ag72 -ag74 -ag76 -ag78 -ag80 -ag199 -ag85 -ag91 -ag101 -ag103 -ag206 -asbasS'nPossibleVals' -p517 -(lp518 -I0 -aI3 -aI2 -aI2 -aI2 -aI3 -aI3 -aI2 -asS'_varNames' -p519 -(lp520 -S'composition' -p521 -aS'max_atomic' -p522 -aS'has3d' -p523 -aS'has4d' -p524 -aS'has5d' -p525 -aS'elconc' -p526 -aS'atvol' -p527 -aS'isferro' -p528 -asS'quantBounds' -p529 -(lp530 -(lp531 -a(lp532 -F1.8899999999999999 -aF3.5299999999999998 -aa(lp533 -a(lp534 -a(lp535 -a(lp536 -F0.55000000000000004 -aF0.72999999999999998 -aa(lp537 -F11.81 -aF14.52 -aa(lp538 -asS'quantizationRequirements' -p539 -(lp540 -I1 -aI1 -aI1 -aI1 -aI1 -aI1 -asS'countList' -p541 -(lp542 -I71 -aI25 -aI1 -aI1 -aI1 -aI1 -asS'errList' -p543 -(lp544 -F0 -aF0.016393442622950824 -aF0.049180327868852458 -aF0.065573770491803282 -aF0.065573770491803282 -aF0.081967213114754092 -asb. diff --git a/rdkit/ML/Composite/test_data/composite_base.neural.pkl b/rdkit/ML/Composite/test_data/composite_base.neural.pkl deleted file mode 100644 index 87a708e6026..00000000000 Binary files a/rdkit/ML/Composite/test_data/composite_base.neural.pkl and /dev/null differ diff --git a/rdkit/ML/Composite/test_data/composite_base.others.both.pkl b/rdkit/ML/Composite/test_data/composite_base.others.both.pkl deleted file mode 100644 index 8ee7517860e..00000000000 Binary files a/rdkit/ML/Composite/test_data/composite_base.others.both.pkl and /dev/null differ diff --git a/rdkit/ML/Composite/test_data/composite_base.others.net.pkl b/rdkit/ML/Composite/test_data/composite_base.others.net.pkl deleted file mode 100644 index a3dd1830128..00000000000 Binary files a/rdkit/ML/Composite/test_data/composite_base.others.net.pkl and /dev/null differ diff --git a/rdkit/ML/Composite/test_data/composite_base.others.neural.pkl b/rdkit/ML/Composite/test_data/composite_base.others.neural.pkl deleted file mode 100644 index 0a764d93f10..00000000000 Binary files a/rdkit/ML/Composite/test_data/composite_base.others.neural.pkl and /dev/null differ diff --git a/rdkit/ML/Composite/test_data/composite_base.others.pkl b/rdkit/ML/Composite/test_data/composite_base.others.pkl deleted file mode 100644 index 63710d2743e..00000000000 Binary files a/rdkit/ML/Composite/test_data/composite_base.others.pkl and /dev/null differ diff --git a/rdkit/ML/Composite/test_data/composite_base.pkl b/rdkit/ML/Composite/test_data/composite_base.pkl deleted file mode 100644 index 864940dea1d..00000000000 --- a/rdkit/ML/Composite/test_data/composite_base.pkl +++ /dev/null @@ -1,2269 +0,0 @@ -ccopy_reg -_reconstructor -p0 -(crdkit.ML.Composite.Composite -Composite -p1 -c__builtin__ -object -p2 -Ntp3 -Rp4 -(dp5 -S'_mapOrder' -p6 -NsS'_descNames' -p7 -(lp8 -sS'modelVotes' -p9 -(lp10 -sS'activityQuant' -p11 -(lp12 -sS'modelList' -p13 -(lp14 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p15 -(dp16 -S'_nResultCodes' -p17 -I2 -sS'name' -p18 -S'Var: 5' -p19 -sS'parent' -p20 -NsS'level' -p21 -I0 -sS'badExamples' -p22 -(lp23 -sS'label' -p24 -I5 -sS'terminalNode' -p25 -I0 -sS'trainingExamples' -p26 -(lp27 -(lp28 -S'Fe3Pd' -p29 -aI2 -aI1 -aI1 -aI0 -aI1 -aI1 -aI1 -aa(lp30 -S'Fe3Pt' -p31 -aI2 -aI1 -aI0 -aI1 -aI1 -aI1 -aI1 -aa(lp32 -S'CrPt3' -p33 -aI2 -aI1 -aI0 -aI1 -aI1 -aI2 -aI1 -aa(lp34 -S'Mn' -p35 -aI2 -aI1 -aI0 -aI0 -aI1 -aI1 -aI1 -aa(lp36 -S'FeCo' -p37 -aI2 -aI1 -aI0 -aI0 -aI2 -aI0 -aI1 -aa(lp38 -S'Fe' -p39 -aI2 -aI1 -aI0 -aI0 -aI1 -aI1 -aI1 -aa(lp40 -S'FeNi3' -p41 -aI2 -aI1 -aI0 -aI0 -aI2 -aI0 -aI1 -aa(lp42 -S'HfV2' -p43 -aI1 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp44 -S'HfNi2' -p45 -aI1 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp46 -S'HfMo2' -p47 -aI1 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp48 -S'Cr2Zr' -p49 -aI2 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp50 -S'Cr2Ti' -p51 -aI2 -aI1 -aI0 -aI0 -aI0 -aI2 -aI0 -aa(lp52 -S'Cr2Ta' -p53 -aI2 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp54 -S'Cr2Nb' -p55 -aI2 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp56 -S'HfW2' -p57 -aI1 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp58 -S'Co2Y' -p59 -aI1 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp60 -S'IrY' -p61 -aI0 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp62 -S'HfTc' -p63 -aI1 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp64 -S'Ir2Sc' -p65 -aI0 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp66 -S'Ir' -p67 -aI0 -aI0 -aI0 -aI1 -aI1 -aI1 -aI0 -aa(lp68 -S'CoTi2' -p69 -aI1 -aI1 -aI0 -aI0 -aI0 -aI2 -aI0 -aa(lp70 -S'CoSc' -p71 -aI1 -aI1 -aI0 -aI0 -aI0 -aI2 -aI0 -aa(lp72 -S'IrV3' -p73 -aI1 -aI1 -aI0 -aI1 -aI0 -aI1 -aI0 -aa(lp74 -S'IrTi3' -p75 -aI0 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp76 -S'IrNb3' -p77 -aI1 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp78 -S'Cr3Ru' -p79 -aI2 -aI1 -aI1 -aI0 -aI0 -aI1 -aI0 -aa(lp80 -S'Cr3Rh' -p81 -aI2 -aI1 -aI1 -aI0 -aI0 -aI1 -aI0 -aa(lp82 -S'Cr3Pt' -p83 -aI2 -aI1 -aI0 -aI1 -aI0 -aI1 -aI0 -aa(lp84 -S'CoV3' -p85 -aI1 -aI1 -aI0 -aI0 -aI0 -aI1 -aI0 -aa(lp86 -S'Cr3Os' -p87 -aI2 -aI1 -aI0 -aI1 -aI0 -aI1 -aI0 -aa(lp88 -S'Cr3Ir' -p89 -aI2 -aI1 -aI0 -aI1 -aI0 -aI1 -aI0 -aa(lp90 -S'W' -p91 -aI1 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp92 -S'V' -p93 -aI1 -aI1 -aI0 -aI0 -aI0 -aI1 -aI0 -aa(lp94 -S'Ta' -p95 -aI0 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp96 -S'HfRu' -p97 -aI1 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp98 -S'HfPt' -p99 -aI0 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp100 -S'Cr' -p101 -aI2 -aI1 -aI0 -aI0 -aI0 -aI1 -aI0 -aa(lp102 -S'HfOs' -p103 -aI1 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp104 -S'Ir3V' -p105 -aI1 -aI1 -aI0 -aI1 -aI1 -aI1 -aI0 -aa(lp106 -S'Ir3Ta' -p107 -aI0 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp108 -S'Ir3Nb' -p109 -aI1 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp110 -S'HfRh3' -p111 -aI0 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp112 -S'HfIr3' -p113 -aI0 -aI0 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp114 -S'Pd' -p115 -aI0 -aI0 -aI1 -aI0 -aI1 -aI2 -aI0 -aa(lp116 -S'CoTi' -p117 -aI1 -aI1 -aI0 -aI0 -aI0 -aI1 -aI0 -aa(lp118 -S'Cr2Hf' -p119 -aI2 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aasS'examples' -p120 -(lp121 -sS'_trainIndices' -p122 -(lp123 -I0 -aI1 -aI2 -aI4 -aI6 -aI8 -aI9 -aI10 -aI11 -aI12 -aI13 -aI14 -aI15 -aI16 -aI17 -aI18 -aI21 -aI23 -aI24 -aI27 -aI29 -aI30 -aI32 -aI33 -aI34 -aI35 -aI36 -aI37 -aI38 -aI39 -aI40 -aI41 -aI42 -aI43 -aI46 -aI47 -aI48 -aI49 -aI50 -aI52 -aI53 -aI54 -aI55 -aI57 -aI59 -aI60 -asS'data' -p124 -F0.6152538910563409 -sS'children' -p125 -(lp126 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p127 -(dp128 -g18 -S'0' -p129 -sg20 -Nsg21 -I1 -sg22 -(lp130 -sg24 -I0 -sg25 -I1 -sg26 -(lp131 -sg120 -(lp132 -sg124 -F0.0 -sg125 -(lp133 -sS'testExamples' -p134 -(lp135 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p136 -(dp137 -g18 -S'Var: 1' -p138 -sg20 -Nsg21 -I1 -sg22 -(lp139 -sg24 -I1 -sg25 -I0 -sg26 -(lp140 -sg120 -(lp141 -sg124 -F0.954434002924965 -sg125 -(lp142 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p143 -(dp144 -g18 -S'0' -p145 -sg20 -Nsg21 -I1 -sg22 -(lp146 -sg24 -I0 -sg25 -I1 -sg26 -(lp147 -sg120 -(lp148 -sg124 -F0.0 -sg125 -(lp149 -sg134 -(lp150 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p151 -(dp152 -g18 -S'0' -p153 -sg20 -Nsg21 -I1 -sg22 -(lp154 -sg24 -I0 -sg25 -I1 -sg26 -(lp155 -sg120 -(lp156 -sg124 -F0.0 -sg125 -(lp157 -sg134 -(lp158 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p159 -(dp160 -g18 -S'1' -p161 -sg20 -Nsg21 -I1 -sg22 -(lp162 -sg24 -I1 -sg25 -I1 -sg26 -(lp163 -sg120 -(lp164 -sg124 -F0.0 -sg125 -(lp165 -sg134 -(lp166 -sbasg134 -(lp167 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p168 -(dp169 -g18 -S'1' -p170 -sg20 -Nsg21 -I1 -sg22 -(lp171 -sg24 -I1 -sg25 -I1 -sg26 -(lp172 -sg120 -(lp173 -sg124 -F0.0 -sg125 -(lp174 -sg134 -(lp175 -sbasg134 -(lp176 -(lp177 -S'Ni' -p178 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aI1 -aa(lp179 -S'FePt3' -p180 -aI2 -aI1 -aI0 -aI1 -aI1 -aI1 -aI1 -aa(lp181 -S'AlFe3' -p182 -aI2 -aI1 -aI0 -aI0 -aI1 -aI1 -aI1 -aa(lp183 -S'Co2Nb' -p184 -aI1 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp185 -S'IrZr' -p186 -aI0 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp187 -S'IrSc' -p188 -aI0 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp189 -S'Ir2Y' -p190 -aI0 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp191 -S'Ir2Zr' -p192 -aI0 -aI0 -aI1 -aI1 -aI0 -aI2 -aI0 -aa(lp193 -S'CoZr' -p194 -aI1 -aI1 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp195 -S'CoHf' -p196 -aI1 -aI1 -aI0 -aI1 -aI0 -aI2 -aI0 -aa(lp197 -S'Nb' -p198 -aI1 -aI0 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp199 -S'Mo' -p200 -aI1 -aI0 -aI1 -aI0 -aI0 -aI2 -aI0 -aa(lp201 -S'Ir3Ti' -p202 -aI0 -aI1 -aI0 -aI1 -aI0 -aI1 -aI0 -aa(lp203 -S'Rh' -p204 -aI0 -aI0 -aI1 -aI0 -aI1 -aI1 -aI0 -aa(lp205 -S'Pt' -p206 -aI0 -aI0 -aI0 -aI1 -aI1 -aI2 -aI0 -aasba(irdkit.ML.DecTree.DecTree -DecTreeNode -p207 -(dp208 -g17 -I2 -sg18 -S'Var: 5' -p209 -sg20 -Nsg21 -I0 -sg22 -(lp210 -g104 -asg24 -I5 -sg25 -I0 -sg26 -(lp211 -g28 -ag30 -ag32 -ag34 -ag179 -ag36 -ag40 -ag44 -ag50 -ag52 -ag54 -ag56 -ag58 -ag185 -ag60 -ag187 -ag62 -ag64 -ag189 -ag191 -ag66 -ag193 -ag68 -ag70 -ag72 -ag74 -ag76 -ag78 -ag80 -ag86 -ag88 -ag96 -ag98 -ag100 -ag102 -ag201 -ag106 -ag108 -ag203 -ag114 -ag205 -ag118 -asg120 -(lp212 -sg122 -(lp213 -I0 -aI1 -aI2 -aI4 -aI5 -aI6 -aI9 -aI11 -aI14 -aI15 -aI16 -aI17 -aI18 -aI20 -aI21 -aI22 -aI23 -aI24 -aI25 -aI26 -aI27 -aI28 -aI29 -aI30 -aI32 -aI33 -aI34 -aI35 -aI36 -aI39 -aI40 -aI46 -aI47 -aI48 -aI49 -aI51 -aI52 -aI53 -aI56 -aI57 -aI58 -aI60 -asg124 -F0.6500224216483541 -sg125 -(lp214 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p215 -(dp216 -g18 -S'0' -p217 -sg20 -Nsg21 -I1 -sg22 -(lp218 -sg24 -I0 -sg25 -I1 -sg26 -(lp219 -sg120 -(lp220 -sg124 -F0.0 -sg125 -(lp221 -sg134 -(lp222 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p223 -(dp224 -g18 -S'Var: 1' -p225 -sg20 -Nsg21 -I1 -sg22 -(lp226 -sg24 -I1 -sg25 -I0 -sg26 -(lp227 -sg120 -(lp228 -sg124 -F0.9910760598382222 -sg125 -(lp229 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p230 -(dp231 -g18 -S'0' -p232 -sg20 -Nsg21 -I1 -sg22 -(lp233 -sg24 -I0 -sg25 -I1 -sg26 -(lp234 -sg120 -(lp235 -sg124 -F0.0 -sg125 -(lp236 -sg134 -(lp237 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p238 -(dp239 -g18 -S'1' -p240 -sg20 -g223 -sg21 -I1 -sg22 -(lp241 -sg24 -I1 -sg25 -I1 -sg26 -(lp242 -sg120 -(lp243 -sg124 -F0.0 -sg125 -(lp244 -sg134 -(lp245 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p246 -(dp247 -g18 -S'1' -p248 -sg20 -Nsg21 -I1 -sg22 -(lp249 -sg24 -I1 -sg25 -I1 -sg26 -(lp250 -sg120 -(lp251 -sg124 -F0.0 -sg125 -(lp252 -sg134 -(lp253 -sbasg134 -(lp254 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p255 -(dp256 -g18 -S'1' -p257 -sg20 -Nsg21 -I1 -sg22 -(lp258 -sg24 -I1 -sg25 -I1 -sg26 -(lp259 -sg120 -(lp260 -sg124 -F0.0 -sg125 -(lp261 -sg134 -(lp262 -sbasg134 -(lp263 -g177 -ag181 -ag38 -ag42 -ag46 -ag48 -ag183 -ag195 -ag82 -ag84 -ag90 -ag92 -ag94 -ag197 -ag199 -ag104 -ag110 -ag112 -ag116 -asba(irdkit.ML.DecTree.DecTree -DecTreeNode -p264 -(dp265 -g17 -I2 -sg18 -S'Var: 5' -p266 -sg20 -Nsg21 -I0 -sg22 -(lp267 -g177 -ag36 -ag40 -asg24 -I5 -sg25 -I0 -sg26 -(lp268 -g28 -ag30 -ag32 -ag34 -ag179 -ag38 -ag42 -ag44 -ag46 -ag48 -ag56 -ag185 -ag60 -ag62 -ag64 -ag66 -ag68 -ag70 -ag195 -ag74 -ag76 -ag78 -ag82 -ag86 -ag92 -ag94 -ag197 -ag199 -ag96 -ag100 -ag102 -ag104 -ag108 -ag110 -ag112 -ag203 -ag205 -ag116 -ag118 -asg120 -(lp269 -sg122 -(lp270 -I0 -aI1 -aI2 -aI4 -aI5 -aI8 -aI10 -aI11 -aI12 -aI13 -aI17 -aI20 -aI21 -aI23 -aI24 -aI27 -aI29 -aI30 -aI31 -aI33 -aI34 -aI35 -aI37 -aI39 -aI42 -aI43 -aI44 -aI45 -aI46 -aI48 -aI49 -aI50 -aI53 -aI54 -aI55 -aI56 -aI58 -aI59 -aI60 -asg124 -F0.6193821946787638 -sg125 -(lp271 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p272 -(dp273 -g18 -S'0' -p274 -sg20 -Nsg21 -I1 -sg22 -(lp275 -sg24 -I0 -sg25 -I1 -sg26 -(lp276 -sg120 -(lp277 -sg124 -F0.0 -sg125 -(lp278 -sg134 -(lp279 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p280 -(dp281 -g18 -S'Var: 1' -p282 -sg20 -Nsg21 -I1 -sg22 -(lp283 -sg24 -I1 -sg25 -I0 -sg26 -(lp284 -sg120 -(lp285 -sg124 -F0.9709505944546688 -sg125 -(lp286 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p287 -(dp288 -g18 -S'0' -p289 -sg20 -Nsg21 -I1 -sg22 -(lp290 -sg24 -I0 -sg25 -I1 -sg26 -(lp291 -sg120 -(lp292 -sg124 -F0.0 -sg125 -(lp293 -sg134 -(lp294 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p295 -(dp296 -g18 -S'0' -p297 -sg20 -Nsg21 -I1 -sg22 -(lp298 -sg24 -I0 -sg25 -I1 -sg26 -(lp299 -sg120 -(lp300 -sg124 -F0.0 -sg125 -(lp301 -sg134 -(lp302 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p303 -(dp304 -g18 -S'1' -p305 -sg20 -Nsg21 -I1 -sg22 -(lp306 -sg24 -I1 -sg25 -I1 -sg26 -(lp307 -sg120 -(lp308 -sg124 -F0.0 -sg125 -(lp309 -sg134 -(lp310 -sbasg134 -(lp311 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p312 -(dp313 -g18 -S'Var: 1' -p314 -sg20 -Nsg21 -I1 -sg22 -(lp315 -sg24 -I1 -sg25 -I0 -sg26 -(lp316 -sg120 -(lp317 -sg124 -F0.0 -sg125 -(lp318 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p319 -(dp320 -g18 -S'0' -p321 -sg20 -g312 -sg21 -I1 -sg22 -(lp322 -sg24 -I0 -sg25 -I1 -sg26 -(lp323 -sg120 -(lp324 -sg124 -F0.0 -sg125 -(lp325 -sg134 -(lp326 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p327 -(dp328 -g18 -S'0' -p329 -sg20 -g312 -sg21 -I1 -sg22 -(lp330 -sg24 -I0 -sg25 -I1 -sg26 -(lp331 -sg120 -(lp332 -sg124 -F0.0 -sg125 -(lp333 -sg134 -(lp334 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p335 -(dp336 -g18 -S'0' -p337 -sg20 -g312 -sg21 -I1 -sg22 -(lp338 -sg24 -I0 -sg25 -I1 -sg26 -(lp339 -sg120 -(lp340 -sg124 -F0.0 -sg125 -(lp341 -sg134 -(lp342 -sbasg134 -(lp343 -sbasg134 -(lp344 -g177 -ag36 -ag181 -ag40 -ag50 -ag52 -ag54 -ag58 -ag183 -ag187 -ag189 -ag191 -ag193 -ag72 -ag80 -ag84 -ag88 -ag90 -ag98 -ag201 -ag106 -ag114 -asba(irdkit.ML.DecTree.DecTree -DecTreeNode -p345 -(dp346 -g17 -I2 -sg18 -S'Var: 5' -p347 -sg20 -Nsg21 -I0 -sg22 -(lp348 -g177 -ag36 -ag40 -ag104 -asg24 -I5 -sg25 -I0 -sg26 -(lp349 -g28 -ag32 -ag34 -ag179 -ag38 -ag42 -ag46 -ag48 -ag52 -ag54 -ag183 -ag185 -ag60 -ag62 -ag64 -ag191 -ag193 -ag68 -ag74 -ag76 -ag80 -ag82 -ag84 -ag92 -ag94 -ag199 -ag96 -ag100 -ag102 -ag106 -ag203 -ag114 -ag116 -ag118 -asg120 -(lp350 -sg122 -(lp351 -I0 -aI2 -aI4 -aI5 -aI8 -aI10 -aI12 -aI13 -aI15 -aI16 -aI19 -aI20 -aI21 -aI23 -aI24 -aI26 -aI28 -aI29 -aI33 -aI34 -aI36 -aI37 -aI38 -aI42 -aI43 -aI45 -aI46 -aI48 -aI49 -aI52 -aI56 -aI57 -aI59 -aI60 -asg124 -F0.6024308020404453 -sg125 -(lp352 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p353 -(dp354 -g18 -S'0' -p355 -sg20 -Nsg21 -I1 -sg22 -(lp356 -sg24 -I0 -sg25 -I1 -sg26 -(lp357 -sg120 -(lp358 -sg124 -F0.0 -sg125 -(lp359 -sg134 -(lp360 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p361 -(dp362 -g18 -S'Var: 1' -p363 -sg20 -Nsg21 -I1 -sg22 -(lp364 -sg24 -I1 -sg25 -I0 -sg26 -(lp365 -sg120 -(lp366 -sg124 -F0.8631205685666311 -sg125 -(lp367 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p368 -(dp369 -g18 -S'0' -p370 -sg20 -Nsg21 -I1 -sg22 -(lp371 -sg24 -I0 -sg25 -I1 -sg26 -(lp372 -sg120 -(lp373 -sg124 -F0.0 -sg125 -(lp374 -sg134 -(lp375 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p376 -(dp377 -g18 -S'1' -p378 -sg20 -g361 -sg21 -I1 -sg22 -(lp379 -sg24 -I1 -sg25 -I1 -sg26 -(lp380 -sg120 -(lp381 -sg124 -F0.0 -sg125 -(lp382 -sg134 -(lp383 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p384 -(dp385 -g18 -S'1' -p386 -sg20 -Nsg21 -I1 -sg22 -(lp387 -sg24 -I1 -sg25 -I1 -sg26 -(lp388 -sg120 -(lp389 -sg124 -F0.0 -sg125 -(lp390 -sg134 -(lp391 -sbasg134 -(lp392 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p393 -(dp394 -g18 -S'Var: 1' -p395 -sg20 -Nsg21 -I1 -sg22 -(lp396 -sg24 -I1 -sg25 -I0 -sg26 -(lp397 -sg120 -(lp398 -sg124 -F0.0 -sg125 -(lp399 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p400 -(dp401 -g18 -S'0' -p402 -sg20 -g393 -sg21 -I1 -sg22 -(lp403 -sg24 -I0 -sg25 -I1 -sg26 -(lp404 -sg120 -(lp405 -sg124 -F0.0 -sg125 -(lp406 -sg134 -(lp407 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p408 -(dp409 -g18 -S'0' -p410 -sg20 -g393 -sg21 -I1 -sg22 -(lp411 -sg24 -I0 -sg25 -I1 -sg26 -(lp412 -sg120 -(lp413 -sg124 -F0.0 -sg125 -(lp414 -sg134 -(lp415 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p416 -(dp417 -g18 -S'0' -p418 -sg20 -g393 -sg21 -I1 -sg22 -(lp419 -sg24 -I0 -sg25 -I1 -sg26 -(lp420 -sg120 -(lp421 -sg124 -F0.0 -sg125 -(lp422 -sg134 -(lp423 -sbasg134 -(lp424 -sbasg134 -(lp425 -g30 -ag177 -ag36 -ag181 -ag40 -ag44 -ag50 -ag56 -ag58 -ag187 -ag189 -ag66 -ag70 -ag195 -ag72 -ag78 -ag86 -ag88 -ag90 -ag197 -ag98 -ag104 -ag201 -ag108 -ag110 -ag112 -ag205 -asba(irdkit.ML.DecTree.DecTree -DecTreeNode -p426 -(dp427 -g17 -I2 -sg18 -S'Var: 5' -p428 -sg20 -Nsg21 -I0 -sg22 -(lp429 -g66 -ag203 -ag114 -ag205 -asg24 -I5 -sg25 -I0 -sg26 -(lp430 -g32 -ag177 -ag34 -ag36 -ag181 -ag40 -ag46 -ag50 -ag52 -ag56 -ag185 -ag60 -ag187 -ag62 -ag64 -ag189 -ag191 -ag193 -ag68 -ag70 -ag195 -ag72 -ag74 -ag76 -ag78 -ag90 -ag92 -ag94 -ag96 -ag100 -ag104 -ag201 -ag106 -ag108 -ag112 -ag118 -asg120 -(lp431 -sg122 -(lp432 -I2 -aI3 -aI4 -aI6 -aI7 -aI9 -aI12 -aI14 -aI15 -aI17 -aI20 -aI21 -aI22 -aI23 -aI24 -aI25 -aI26 -aI28 -aI29 -aI30 -aI31 -aI32 -aI33 -aI34 -aI35 -aI41 -aI42 -aI43 -aI46 -aI48 -aI50 -aI51 -aI52 -aI53 -aI55 -aI60 -asg124 -F0.6500224216483541 -sg125 -(lp433 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p434 -(dp435 -g18 -S'0' -p436 -sg20 -Nsg21 -I1 -sg22 -(lp437 -sg24 -I0 -sg25 -I1 -sg26 -(lp438 -sg120 -(lp439 -sg124 -F0.0 -sg125 -(lp440 -sg134 -(lp441 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p442 -(dp443 -g18 -S'Var: 1' -p444 -sg20 -Nsg21 -I1 -sg22 -(lp445 -sg24 -I1 -sg25 -I0 -sg26 -(lp446 -sg120 -(lp447 -sg124 -F0.8112781244591328 -sg125 -(lp448 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p449 -(dp450 -g18 -S'1' -p451 -sg20 -g442 -sg21 -I1 -sg22 -(lp452 -sg24 -I1 -sg25 -I1 -sg26 -(lp453 -sg120 -(lp454 -sg124 -F0.0 -sg125 -(lp455 -sg134 -(lp456 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p457 -(dp458 -g18 -S'0' -p459 -sg20 -Nsg21 -I1 -sg22 -(lp460 -sg24 -I0 -sg25 -I1 -sg26 -(lp461 -sg120 -(lp462 -sg124 -F0.0 -sg125 -(lp463 -sg134 -(lp464 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p465 -(dp466 -g18 -S'1' -p467 -sg20 -Nsg21 -I1 -sg22 -(lp468 -sg24 -I1 -sg25 -I1 -sg26 -(lp469 -sg120 -(lp470 -sg124 -F0.0 -sg125 -(lp471 -sg134 -(lp472 -sbasg134 -(lp473 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p474 -(dp475 -g18 -S'1' -p476 -sg20 -Nsg21 -I1 -sg22 -(lp477 -sg24 -I1 -sg25 -I1 -sg26 -(lp478 -sg120 -(lp479 -sg124 -F0.0 -sg125 -(lp480 -sg134 -(lp481 -sbasg134 -(lp482 -g28 -ag30 -ag179 -ag38 -ag42 -ag44 -ag48 -ag54 -ag58 -ag183 -ag66 -ag80 -ag82 -ag84 -ag86 -ag88 -ag197 -ag199 -ag98 -ag102 -ag110 -ag203 -ag114 -ag205 -ag116 -asbasS'nPossibleVals' -p483 -(lp484 -I0 -aI3 -aI2 -aI2 -aI2 -aI3 -aI3 -aI2 -asS'_varNames' -p485 -(lp486 -S'composition' -p487 -aS'max_atomic' -p488 -aS'has3d' -p489 -aS'has4d' -p490 -aS'has5d' -p491 -aS'elconc' -p492 -aS'atvol' -p493 -aS'isferro' -p494 -asS'quantBounds' -p495 -(lp496 -(lp497 -a(lp498 -F1.89 -aF3.53 -aa(lp499 -a(lp500 -a(lp501 -a(lp502 -F0.55 -aF0.73 -aa(lp503 -F11.81 -aF14.52 -aa(lp504 -asS'quantizationRequirements' -p505 -(lp506 -I1 -aI1 -aI1 -aI1 -aI1 -asS'countList' -p507 -(lp508 -I75 -aI21 -aI1 -aI2 -aI1 -asS'errList' -p509 -(lp510 -F0.0 -aF0.01639344262295082 -aF0.04918032786885246 -aF0.06557377049180328 -aF0.06557377049180328 -asb. \ No newline at end of file diff --git a/rdkit/ML/Composite/test_data/composite_base.unitnet.pkl b/rdkit/ML/Composite/test_data/composite_base.unitnet.pkl deleted file mode 100644 index 0d278c11b6a..00000000000 Binary files a/rdkit/ML/Composite/test_data/composite_base.unitnet.pkl and /dev/null differ diff --git a/rdkit/ML/Composite/test_data/composite_base.unittree.pkl b/rdkit/ML/Composite/test_data/composite_base.unittree.pkl deleted file mode 100644 index 8b47be571eb..00000000000 Binary files a/rdkit/ML/Composite/test_data/composite_base.unittree.pkl and /dev/null differ diff --git a/rdkit/ML/Composite/test_data/composite_plus_neural.pkl b/rdkit/ML/Composite/test_data/composite_plus_neural.pkl deleted file mode 100644 index b1daebd49be..00000000000 Binary files a/rdkit/ML/Composite/test_data/composite_plus_neural.pkl and /dev/null differ diff --git a/rdkit/ML/Composite/test_data/ferro.others.pkl b/rdkit/ML/Composite/test_data/ferro.others.pkl deleted file mode 100644 index b98f51419ea..00000000000 --- a/rdkit/ML/Composite/test_data/ferro.others.pkl +++ /dev/null @@ -1,551 +0,0 @@ -(lp1 -(lp2 -S'CrPt3' -aF0.61966934200000001 -aF14.523874899999999 -aI1 -aI0 -aI1 -aF0.22889799999999999 -aI1 -aa(lp3 -S'Fe3Pd' -aF0.61090143399999997 -aF13.91386486 -aI1 -aI1 -aI0 -aF0.185644 -aI1 -aa(lp4 -S'Fe3Pt' -aF0.65516761400000001 -aF12.97377925 -aI1 -aI0 -aI1 -aF0.24101400000000001 -aI1 -aa(lp5 -S'FeNi3' -aF0.84758124199999996 -aF11.20836508 -aI1 -aI0 -aI0 -aF0.180974 -aI1 -aa(lp6 -S'FePt3' -aF0.65460233599999995 -aF14.51262771 -aI1 -aI0 -aI1 -aF0.22326599999999999 -aI1 -aa(lp7 -S'HfIr3' -aF0.51228401899999998 -aF15.12832669 -aI0 -aI0 -aI1 -aF0.67120000000000002 -aI0 -aa(lp8 -S'HfRh3' -aF0.51820099900000005 -aF14.955586759999999 -aI0 -aI1 -aI1 -aF0.64440799999999998 -aI0 -aa(lp9 -S'Ir3Nb' -aF0.54237138299999998 -aF14.750040739999999 -aI0 -aI1 -aI1 -aF1.5935779999999999 -aI0 -aa(lp10 -S'Ir3Ta' -aF0.54530764899999995 -aF14.670617610000001 -aI0 -aI0 -aI1 -aF1.768562 -aI0 -aa(lp11 -S'Ir3Ti' -aF0.54534666499999995 -aF14.21114403 -aI1 -aI0 -aI1 -aF0.71016000000000001 -aI0 -aa(lp12 -S'Ir3V' -aF0.57768528100000005 -aF13.84837083 -aI1 -aI0 -aI1 -aF0.94931399999999999 -aI0 -aa(lp13 -S'AlFe3' -aF0.55352822999999995 -aF12.1945 -aI1 -aI0 -aI0 -aF0.15761800000000001 -aI1 -aa(lp14 -S'Cr' -aF0.50234696499999998 -aF11.943936000000001 -aI1 -aI0 -aI0 -aF0.71659499999999998 -aI0 -aa(lp15 -S'Fe' -aF0.67682172799999996 -aF11.8199515 -aI1 -aI0 -aI0 -aF0.13253599999999999 -aI1 -aa(lp16 -S'Mn' -aF0.59221901200000004 -aF11.8199515 -aI1 -aI0 -aI0 -aF0.165265 -aI1 -aa(lp17 -S'Mo' -aF0.38392782199999997 -aF15.6279375 -aI0 -aI1 -aI0 -aF0.85421999999999998 -aI0 -aa(lp18 -S'Nb' -aF0.27826474099999998 -aF17.968499999999999 -aI0 -aI1 -aI0 -aF0.37559700000000001 -aI0 -aa(lp19 -S'Ta' -aF0.275750316 -aF18.1323455 -aI0 -aI0 -aI1 -aF0.42770000000000002 -aI0 -aa(lp20 -S'V' -aF0.36306064199999999 -aF13.771803999999999 -aI0 -aI0 -aI0 -aF0.28043600000000002 -aI0 -aa(lp21 -S'W' -aF0.38029445899999997 -aF15.777248 -aI0 -aI0 -aI1 -aF1.0827500000000001 -aI0 -aa(lp22 -S'Cr3Ir' -aF0.26256387799999997 -aF12.85401489 -aI1 -aI0 -aI1 -aF0.27913199999999999 -aI0 -aa(lp23 -S'Cr3Os' -aF0.26273208100000001 -aF12.84578569 -aI1 -aI0 -aI1 -aF0.32028800000000002 -aI0 -aa(lp24 -S'CoV3' -aF0.46978202099999999 -aF12.77188086 -aI1 -aI0 -aI0 -aF0.62309199999999998 -aI0 -aa(lp25 -S'Cr3Pt' -aF0.53731907499999998 -aF13.027640979999999 -aI1 -aI0 -aI1 -aF0.38081999999999999 -aI0 -aa(lp26 -S'Cr3Rh' -aF0.528844066 -aF12.76368675 -aI1 -aI1 -aI0 -aF0.24842 -aI0 -aa(lp27 -S'Cr3Ru' -aF0.507626409 -aF12.80469235 -aI1 -aI1 -aI0 -aF0.30465199999999998 -aI0 -aa(lp28 -S'IrNb3' -aF0.35471017300000002 -aF16.91521826 -aI0 -aI1 -aI1 -aF0.57980399999999999 -aI0 -aa(lp29 -S'IrTi3' -aF0.33419211199999999 -aF15.709526970000001 -aI1 -aI0 -aI1 -aF0.423736 -aI0 -aa(lp30 -S'IrV3' -aF0.43729932999999999 -aF13.720578980000001 -aI1 -aI0 -aI1 -aF0.58485600000000004 -aI0 -aa(lp31 -S'CoHf' -aF0.41042511399999998 -aF15.83723747 -aI1 -aI0 -aI1 -aF0.50134800000000002 -aI0 -aa(lp32 -S'CoSc' -aF0.38576186899999998 -aF15.55363681 -aI1 -aI0 -aI0 -aF1.05901 -aI0 -aa(lp33 -S'CoTi' -aF0.483896936 -aF13.43261244 -aI1 -aI0 -aI0 -aF0.35558600000000001 -aI0 -aa(lp34 -S'CoZr' -aF0.398968508 -aF16.29201269 -aI1 -aI1 -aI0 -aF0.46965699999999999 -aI0 -aa(lp35 -S'FeCo' -aF0.73405988799999999 -aF11.579436680000001 -aI1 -aI0 -aI0 -aF0.111301 -aI1 -aa(lp36 -S'HfOs' -aF0.35314121100000001 -aF16.990370460000001 -aI0 -aI0 -aI1 -aF3.27698 -aI0 -aa(lp37 -S'HfPt' -aF0.41545163099999999 -aF16.849133500000001 -aI0 -aI0 -aI1 -aF0.69706299999999999 -aI0 -aa(lp38 -S'HfRu' -aF0.35776025299999997 -aF16.77100781 -aI0 -aI1 -aI1 -aF2.2109000000000001 -aI0 -aa(lp39 -S'HfTc' -aF0.31459326999999998 -aF17.482891500000001 -aI0 -aI1 -aI1 -aF0.69430199999999997 -aI0 -aa(lp40 -S'IrSc' -aF0.364158702 -aF16.47633291 -aI1 -aI0 -aI1 -aF2.2385199999999998 -aI0 -aa(lp41 -S'IrY' -aF0.30531243600000002 -aF19.652000000000001 -aI0 -aI1 -aI1 -aF1.2551600000000001 -aI0 -aa(lp42 -S'IrZr' -aF0.35588870700000003 -aF18.26413672 -aI0 -aI1 -aI1 -aF0.61846900000000005 -aI0 -aa(lp43 -S'Co2Nb' -aF0.29597310599999999 -aF25.903254400000002 -aI1 -aI1 -aI0 -aF0.30956099999999998 -aI0 -aa(lp44 -S'Co2Y' -aF0.223929924 -aF31.259779340000001 -aI1 -aI1 -aI0 -aF0.48575099999999999 -aI0 -aa(lp45 -S'CoTi2' -aF0.223081584 -aF25.40176808 -aI1 -aI0 -aI0 -aF0.32026199999999999 -aI0 -aa(lp46 -S'Cr2Hf' -aF0.175826908 -aF30.332861999999999 -aI1 -aI0 -aI1 -aF0.92350200000000005 -aI0 -aa(lp47 -S'Cr2Nb' -aF0.20256037800000001 -aF27.975197919999999 -aI1 -aI1 -aI0 -aF0.39364500000000002 -aI0 -aa(lp48 -S'Cr2Ta' -aF0.20160161700000001 -aF28.108240139999999 -aI1 -aI0 -aI1 -aF0.490095 -aI0 -aa(lp49 -S'Cr2Ti' -aF0.19031626900000001 -aF28.023528219999999 -aI1 -aI0 -aI0 -aF1.0284420000000001 -aI0 -aa(lp50 -S'Cr2Zr' -aF0.17040053999999999 -aF31.29880528 -aI1 -aI0 -aI0 -aF0.86694599999999999 -aI0 -aa(lp51 -S'HfMo2' -aF0.14900548 -aF35.792866969999999 -aI0 -aI1 -aI1 -aF1.008786 -aI0 -aa(lp52 -S'HfNi2' -aF0.29146842299999998 -aF27.447227120000001 -aI1 -aI0 -aI1 -aF0.80195099999999997 -aI0 -aa(lp53 -S'HfV2' -aF0.13947478299999999 -aF33.458855980000003 -aI1 -aI0 -aI1 -aF0.19059000000000001 -aI0 -aa(lp54 -S'HfW2' -aF0.14637086699999999 -aF36.437123249999999 -aI0 -aI0 -aI1 -aF1.222842 -aI0 -aa(lp55 -S'Ir2Sc' -aF0.15129380000000001 -aF33.048280910000003 -aI1 -aI0 -aI1 -aF0.500919 -aI0 -aa(lp56 -S'Ir2Y' -aF0.14075330699999999 -aF35.523143959999999 -aI0 -aI1 -aI1 -aF0.88424999999999998 -aI0 -aa(lp57 -S'Ir2Zr' -aF0.160591874 -aF33.210480769999997 -aI0 -aI1 -aI1 -aF0.52835699999999997 -aI0 -aa(lp58 -S'Ir' -aF0.635782878 -aF14.155775999999999 -aI0 -aI0 -aI1 -aF0.56740999999999997 -aI0 -aa(lp59 -S'Ni' -aF0.91713232499999997 -aF10.903551999999999 -aI1 -aI0 -aI0 -aF0.114277 -aI1 -aa(lp60 -S'Pd' -aF0.67953399400000003 -aF14.71596725 -aI0 -aI1 -aI0 -aF0.200794 -aI0 -aa(lp61 -S'Pt' -aF0.66405154300000002 -aF15.059072 -aI0 -aI0 -aI1 -aF0.25342599999999998 -aI0 -aa(lp62 -S'Rh' -aF0.65607231399999999 -aF13.718 -aI0 -aI1 -aI0 -aF0.40198299999999998 -aI0 -aa. diff --git a/rdkit/ML/Composite/test_data/ferro.others.txt b/rdkit/ML/Composite/test_data/ferro.others.txt deleted file mode 100644 index 927e8ac3021..00000000000 --- a/rdkit/ML/Composite/test_data/ferro.others.txt +++ /dev/null @@ -1,61 +0,0 @@ -CrPt3 0.619669342 14.5238749 1 0 1 0.228898 2.219 1 -Fe3Pd 0.610901434 13.91386486 1 1 0 0.185644 2.86835 1 -Fe3Pt 0.655167614 12.97377925 1 0 1 0.241014 2.10665 1 -FeNi3 0.847581242 11.20836508 1 0 0 0.180974 2.68045 1 -FePt3 0.654602336 14.51262771 1 0 1 0.223266 2.225775 1 -HfIr3 0.512284019 15.12832669 0 0 1 0.6712 0.7856 0 -HfRh3 0.518200999 14.95558676 0 1 1 0.644408 0.762975 0 -Ir3Nb 0.542371383 14.75004074 0 1 1 1.593578 0.312225 0 -Ir3Ta 0.545307649 14.67061761 0 0 1 1.768562 0.28265 0 -Ir3Ti 0.545346665 14.21114403 1 0 1 0.71016 0.76825 0 -Ir3V 0.577685281 13.84837083 1 0 1 0.949314 0.510375 0 -AlFe3 0.55352823 12.1945 1 0 0 0.157618 4.203725 1 -Cr 0.502346965 11.943936 1 0 0 0.716595 0.6845 0 -Fe 0.676821728 11.8199515 1 0 0 0.132536 3.5619 1 -Mn 0.592219012 11.8199515 1 0 0 0.165265 3.0354 1 -Mo 0.383927822 15.6279375 0 1 0 0.85422 0.5807 0 -Nb 0.278264741 17.9685 0 1 0 0.375597 1.3246 0 -Ta 0.275750316 18.1323455 0 0 1 0.4277 1.1663 0 -V 0.363060642 13.771804 0 0 0 0.280436 1.7741 0 -W 0.380294459 15.777248 0 0 1 1.08275 0.4581 0 -Cr3Ir 0.262563878 12.85401489 1 0 1 0.279132 2.0147125 0 -Cr3Os 0.262732081 12.84578569 1 0 1 0.320288 1.6556875 0 -CoV3 0.469782021 12.77188086 1 0 0 0.623092 0.8652 0 -Cr3Pt 0.537319075 13.02764098 1 0 1 0.38082 1.1689125 0 -Cr3Rh 0.528844066 12.76368675 1 1 0 0.24842 2.2005375 0 -Cr3Ru 0.507626409 12.80469235 1 1 0 0.304652 1.8208875 0 -IrNb3 0.354710173 16.91521826 0 1 1 0.579804 0.9278625 0 -IrTi3 0.334192112 15.70952697 1 0 1 0.423736 1.2389125 0 -IrV3 0.43729933 13.72057898 1 0 1 0.584856 0.8927125 0 -CoHf 0.410425114 15.83723747 1 0 1 0.501348 0.9952 0 -CoSc 0.385761869 15.55363681 1 0 0 1.05901 0.46085 0 -CoTi 0.483896936 13.43261244 1 0 0 0.355586 1.40905 0 -CoZr 0.398968508 16.29201269 1 1 0 0.469657 1.05835 0 -FeCo 0.734059888 11.57943668 1 0 0 0.111301 5.31515 1 -HfOs 0.353141211 16.99037046 0 0 1 3.27698 0.15245 0 -HfPt 0.415451631 16.8491335 0 0 1 0.697063 0.71185 0 -HfRu 0.357760253 16.77100781 0 1 1 2.2109 0.20815 0 -HfTc 0.31459327 17.4828915 0 1 1 0.694302 0.69565 0 -IrSc 0.364158702 16.47633291 1 0 1 2.23852 0.2158 0 -IrY 0.305312436 19.652 0 1 1 1.25516 0.35485 0 -IrZr 0.355888707 18.26413672 0 1 1 0.618469 0.80285 0 -Co2Nb 0.295973106 25.9032544 1 1 0 0.309561 1.74305 0 -Co2Y 0.223929924 31.25977934 1 1 0 0.485751 0.727433333 0 -CoTi2 0.223081584 25.40176808 1 0 0 0.320262 1.673383333 0 -Cr2Hf 0.175826908 30.332862 1 0 1 0.923502 0.540683333 0 -Cr2Nb 0.202560378 27.97519792 1 1 0 0.393645 1.364133333 0 -Cr2Ta 0.201601617 28.10824014 1 0 1 0.490095 1.231216667 0 -Cr2Ti 0.190316269 28.02352822 1 0 0 1.028442 0.476883333 0 -Cr2Zr 0.17040054 31.29880528 1 0 0 0.866946 0.576983333 0 -HfMo2 0.14900548 35.79286697 0 1 1 1.008786 0.495866667 0 -HfNi2 0.291468423 27.44722712 1 0 1 0.801951 0.5928 0 -HfV2 0.139474783 33.45885598 1 0 1 0.19059 2.465583333 0 -HfW2 0.146370867 36.43712325 0 0 1 1.222842 0.4088 0 -Ir2Sc 0.1512938 33.04828091 1 0 1 0.500919 0.6356 0 -Ir2Y 0.140753307 35.52314396 0 1 1 0.88425 0.617 0 -Ir2Zr 0.160591874 33.21048077 0 1 1 0.528357 0.9719 0 -Ir 0.635782878 14.155776 0 0 1 0.56741 0.8779 0 -Ni 0.917132325 10.903552 1 0 0 0.114277 4.2404 1 -Pd 0.679533994 14.71596725 0 1 0 0.200794 2.5226 0 -Pt 0.664051543 15.059072 0 0 1 0.253426 1.997 0 -Rh 0.656072314 13.718 0 1 0 0.401983 1.2426 0 diff --git a/rdkit/ML/Composite/test_data/ferro.pkl b/rdkit/ML/Composite/test_data/ferro.pkl deleted file mode 100644 index 6f2d0dd6d93..00000000000 --- a/rdkit/ML/Composite/test_data/ferro.pkl +++ /dev/null @@ -1,551 +0,0 @@ -(lp1 -(lp2 -S'Fe3Pd' -aF3.5345430374100002 -aI1 -aI1 -aI0 -aF0.61090143443599998 -aF13.9138648575 -aI1 -aa(lp3 -S'Fe3Pt' -aF3.5345430374100002 -aI1 -aI0 -aI1 -aF0.65516761432500004 -aF12.97377925 -aI1 -aa(lp4 -S'CrPt3' -aF3.6748180389399998 -aI1 -aI0 -aI1 -aF0.619669341609 -aF14.523874905 -aI1 -aa(lp5 -S'Ni' -aF1.89935600758 -aI1 -aI0 -aI0 -aF0.91713232531900002 -aF10.903551999999999 -aI1 -aa(lp6 -S'Mn' -aF4.2085700035100002 -aI1 -aI0 -aI0 -aF0.59221901206600003 -aF11.8199515 -aI1 -aa(lp7 -S'FePt3' -aF3.5345430374100002 -aI1 -aI0 -aI1 -aF0.65460233585500005 -aF14.5126277125 -aI1 -aa(lp8 -S'FeCo' -aF3.5345430374100002 -aI1 -aI0 -aI0 -aF0.73405988833699998 -aF11.579436684999999 -aI1 -aa(lp9 -S'AlFe3' -aF3.5345430374100002 -aI1 -aI0 -aI0 -aF0.55352822993999995 -aF12.1945 -aI1 -aa(lp10 -S'Fe' -aF3.5345430374100002 -aI1 -aI0 -aI0 -aF0.67682172807600005 -aF11.8199515 -aI1 -aa(lp11 -S'FeNi3' -aF3.5345430374100002 -aI1 -aI0 -aI0 -aF0.84758124223099995 -aF11.2083650825 -aI1 -aa(lp12 -S'HfV2' -aF2.2877440452600002 -aI1 -aI0 -aI1 -aF0.13947478270499999 -aF33.458855974999999 -aI0 -aa(lp13 -S'HfNi2' -aF1.89935600758 -aI1 -aI0 -aI1 -aF0.29146842287500002 -aF27.447227116699999 -aI0 -aa(lp14 -S'HfMo2' -aF2.9241099357599998 -aI0 -aI1 -aI1 -aF0.14900548028999999 -aF35.792866966699997 -aI0 -aa(lp15 -S'Cr2Zr' -aF3.6748180389399998 -aI1 -aI1 -aI0 -aF0.17040054037399999 -aF31.298805283299998 -aI0 -aa(lp16 -S'Cr2Ti' -aF3.6748180389399998 -aI1 -aI0 -aI0 -aF0.190316268905 -aF28.023528225 -aI0 -aa(lp17 -S'Cr2Ta' -aF3.6748180389399998 -aI1 -aI0 -aI1 -aF0.20160161710999999 -aF28.108240141700001 -aI0 -aa(lp18 -S'Cr2Nb' -aF3.6748180389399998 -aI1 -aI1 -aI0 -aF0.20256037807300001 -aF27.975197916700001 -aI0 -aa(lp19 -S'HfW2' -aF2.1384620666499998 -aI0 -aI0 -aI1 -aF0.14637086733599999 -aF36.437123249999999 -aI0 -aa(lp20 -S'Co2Y' -aF2.7561609745000002 -aI1 -aI1 -aI0 -aF0.22392992360899999 -aF31.2597793417 -aI0 -aa(lp21 -S'Co2Nb' -aF2.7561609745000002 -aI1 -aI1 -aI0 -aF0.29597310624700002 -aF25.903254400000002 -aI0 -aa(lp22 -S'IrZr' -aF1.6517590284300001 -aI0 -aI1 -aI1 -aF0.35588870700199998 -aF18.264136714999999 -aI0 -aa(lp23 -S'IrY' -aF1.6517590284300001 -aI0 -aI1 -aI1 -aF0.30531243639299999 -aF19.652000000000001 -aI0 -aa(lp24 -S'IrSc' -aF1.6517590284300001 -aI1 -aI0 -aI1 -aF0.36415870162199998 -aF16.47633291 -aI0 -aa(lp25 -S'HfTc' -aF2.9957029819500001 -aI0 -aI1 -aI1 -aF0.31459326965500001 -aF17.482891500000001 -aI0 -aa(lp26 -S'Ir2Sc' -aF1.6517590284300001 -aI1 -aI0 -aI1 -aF0.15129379993700001 -aF33.048280908300001 -aI0 -aa(lp27 -S'Ir2Y' -aF1.6517590284300001 -aI0 -aI1 -aI1 -aF0.14075330732800001 -aF35.523143966699998 -aI0 -aa(lp28 -S'Ir2Zr' -aF1.6517590284300001 -aI0 -aI1 -aI1 -aF0.160591873676 -aF33.210480775000001 -aI0 -aa(lp29 -S'Ir' -aF1.6517590284300001 -aI0 -aI0 -aI1 -aF0.63578287760399999 -aF14.155775999999999 -aI0 -aa(lp30 -S'CoZr' -aF2.7561609745000002 -aI1 -aI1 -aI0 -aF0.39896850829199998 -aF16.29201269 -aI0 -aa(lp31 -S'CoTi2' -aF2.7561609745000002 -aI1 -aI0 -aI0 -aF0.22308158424499999 -aF25.401768083299999 -aI0 -aa(lp32 -S'CoSc' -aF2.7561609745000002 -aI1 -aI0 -aI0 -aF0.38576186851799998 -aF15.553636815000001 -aI0 -aa(lp33 -S'CoHf' -aF2.7561609745000002 -aI1 -aI0 -aI1 -aF0.41042511437399998 -aF15.83723747 -aI0 -aa(lp34 -S'IrV3' -aF2.2877440452600002 -aI1 -aI0 -aI1 -aF0.43729933011299998 -aF13.7205789875 -aI0 -aa(lp35 -S'IrTi3' -aF1.6517590284300001 -aI1 -aI0 -aI1 -aF0.33419211237399998 -aF15.7095269625 -aI0 -aa(lp36 -S'IrNb3' -aF2.3222479820299999 -aI0 -aI1 -aI1 -aF0.35471017322300002 -aF16.915218262500002 -aI0 -aa(lp37 -S'Cr3Ru' -aF3.6748180389399998 -aI1 -aI1 -aI0 -aF0.50762640931400005 -aF12.80469235 -aI0 -aa(lp38 -S'Cr3Rh' -aF3.6748180389399998 -aI1 -aI1 -aI0 -aF0.52884406615500001 -aF12.76368675 -aI0 -aa(lp39 -S'Cr3Pt' -aF3.6748180389399998 -aI1 -aI0 -aI1 -aF0.53731907514400001 -aF13.027640975000001 -aI0 -aa(lp40 -S'CoV3' -aF2.7561609745000002 -aI1 -aI0 -aI0 -aF0.46978202072199998 -aF12.7718808625 -aI0 -aa(lp41 -S'Cr3Os' -aF3.6748180389399998 -aI1 -aI0 -aI1 -aF0.26273208055199998 -aF12.845785687499999 -aI0 -aa(lp42 -S'Cr3Ir' -aF3.6748180389399998 -aI1 -aI0 -aI1 -aF0.26256387825400002 -aF12.8540148875 -aI0 -aa(lp43 -S'W' -aF2.1384620666499998 -aI0 -aI0 -aI1 -aF0.38029445946500001 -aF15.777248 -aI0 -aa(lp44 -S'V' -aF2.2877440452600002 -aI1 -aI0 -aI0 -aF0.363060641874 -aF13.771803999999999 -aI0 -aa(lp45 -S'Ta' -aF1.5656620264100001 -aI0 -aI0 -aI1 -aF0.27575031591999999 -aF18.1323455 -aI0 -aa(lp46 -S'Nb' -aF2.3222479820299999 -aI0 -aI1 -aI0 -aF0.27826474107499999 -aF17.968499999999999 -aI0 -aa(lp47 -S'Mo' -aF2.9241099357599998 -aI0 -aI1 -aI0 -aF0.38392782156999999 -aF15.6279375 -aI0 -aa(lp48 -S'HfRu' -aF1.89930105209 -aI0 -aI1 -aI1 -aF0.35776025306199999 -aF16.771007815000001 -aI0 -aa(lp49 -S'HfPt' -aF1.0095700025600001 -aI0 -aI0 -aI1 -aF0.41545163138500002 -aF16.849133500000001 -aI0 -aa(lp50 -S'Cr' -aF3.6748180389399998 -aI1 -aI0 -aI0 -aF0.50234696502099996 -aF11.943936000000001 -aI0 -aa(lp51 -S'HfOs' -aF2.1916060447699999 -aI0 -aI0 -aI1 -aF0.35314121102500001 -aF16.990370460000001 -aI0 -aa(lp52 -S'Ir3V' -aF2.2877440452600002 -aI1 -aI0 -aI1 -aF0.57768528130600005 -aF13.848370832500001 -aI0 -aa(lp53 -S'Ir3Ti' -aF1.6517590284300001 -aI1 -aI0 -aI1 -aF0.54534666472100002 -aF14.2111440325 -aI0 -aa(lp54 -S'Ir3Ta' -aF1.6517590284300001 -aI0 -aI0 -aI1 -aF0.54530764893100003 -aF14.670617614999999 -aI0 -aa(lp55 -S'Ir3Nb' -aF2.3222479820299999 -aI0 -aI1 -aI1 -aF0.54237138330800005 -aF14.750040739999999 -aI0 -aa(lp56 -S'HfRh3' -aF1.3449230194099999 -aI0 -aI1 -aI1 -aF0.51820099910899997 -aF14.955586757500001 -aI0 -aa(lp57 -S'HfIr3' -aF1.6517590284300001 -aI0 -aI0 -aI1 -aF0.51228401899599996 -aF15.128326695 -aI0 -aa(lp58 -S'Rh' -aF1.3449230194099999 -aI0 -aI1 -aI0 -aF0.65607231374800001 -aF13.718 -aI0 -aa(lp59 -S'Pd' -aF0 -aI0 -aI1 -aI0 -aF0.67953399393400005 -aF14.71596725 -aI0 -aa(lp60 -S'Pt' -aF0.70885699987399997 -aI0 -aI0 -aI1 -aF0.66405154315000003 -aF15.059072 -aI0 -aa(lp61 -S'CoTi' -aF2.7561609745000002 -aI1 -aI0 -aI0 -aF0.483896935837 -aF13.43261244 -aI0 -aa(lp62 -S'Cr2Hf' -aF3.6748180389399998 -aI1 -aI0 -aI1 -aF0.17582690790399999 -aF30.332861999999999 -aI0 -aa. diff --git a/rdkit/ML/Composite/test_data/ferro.txt b/rdkit/ML/Composite/test_data/ferro.txt deleted file mode 100644 index 19c3dba46ab..00000000000 --- a/rdkit/ML/Composite/test_data/ferro.txt +++ /dev/null @@ -1,61 +0,0 @@ -Fe3Pd 3.53454303741 1 1 0 0.610901434436 13.9138648575 1 -Fe3Pt 3.53454303741 1 0 1 0.655167614325 12.97377925 1 -CrPt3 3.67481803894 1 0 1 0.619669341609 14.523874905 1 -Ni 1.89935600758 1 0 0 0.917132325319 10.903552 1 -Mn 4.20857000351 1 0 0 0.592219012066 11.8199515 1 -FePt3 3.53454303741 1 0 1 0.654602335855 14.5126277125 1 -FeCo 3.53454303741 1 0 0 0.734059888337 11.579436685 1 -AlFe3 3.53454303741 1 0 0 0.55352822994 12.1945 1 -Fe 3.53454303741 1 0 0 0.676821728076 11.8199515 1 -FeNi3 3.53454303741 1 0 0 0.847581242231 11.2083650825 1 -HfV2 2.28774404526 1 0 1 0.139474782705 33.458855975 0 -HfNi2 1.89935600758 1 0 1 0.291468422875 27.4472271167 0 -HfMo2 2.92410993576 0 1 1 0.14900548029 35.7928669667 0 -Cr2Zr 3.67481803894 1 1 0 0.170400540374 31.2988052833 0 -Cr2Ti 3.67481803894 1 0 0 0.190316268905 28.023528225 0 -Cr2Ta 3.67481803894 1 0 1 0.20160161711 28.1082401417 0 -Cr2Nb 3.67481803894 1 1 0 0.202560378073 27.9751979167 0 -HfW2 2.13846206665 0 0 1 0.146370867336 36.43712325 0 -Co2Y 2.7561609745 1 1 0 0.223929923609 31.2597793417 0 -Co2Nb 2.7561609745 1 1 0 0.295973106247 25.9032544 0 -IrZr 1.65175902843 0 1 1 0.355888707002 18.264136715 0 -IrY 1.65175902843 0 1 1 0.305312436393 19.652 0 -IrSc 1.65175902843 1 0 1 0.364158701622 16.47633291 0 -HfTc 2.99570298195 0 1 1 0.314593269655 17.4828915 0 -Ir2Sc 1.65175902843 1 0 1 0.151293799937 33.0482809083 0 -Ir2Y 1.65175902843 0 1 1 0.140753307328 35.5231439667 0 -Ir2Zr 1.65175902843 0 1 1 0.160591873676 33.210480775 0 -Ir 1.65175902843 0 0 1 0.635782877604 14.155776 0 -CoZr 2.7561609745 1 1 0 0.398968508292 16.29201269 0 -CoTi2 2.7561609745 1 0 0 0.223081584245 25.4017680833 0 -CoSc 2.7561609745 1 0 0 0.385761868518 15.553636815 0 -CoHf 2.7561609745 1 0 1 0.410425114374 15.83723747 0 -IrV3 2.28774404526 1 0 1 0.437299330113 13.7205789875 0 -IrTi3 1.65175902843 1 0 1 0.334192112374 15.7095269625 0 -IrNb3 2.32224798203 0 1 1 0.354710173223 16.9152182625 0 -Cr3Ru 3.67481803894 1 1 0 0.507626409314 12.80469235 0 -Cr3Rh 3.67481803894 1 1 0 0.528844066155 12.76368675 0 -Cr3Pt 3.67481803894 1 0 1 0.537319075144 13.027640975 0 -CoV3 2.7561609745 1 0 0 0.469782020722 12.7718808625 0 -Cr3Os 3.67481803894 1 0 1 0.262732080552 12.8457856875 0 -Cr3Ir 3.67481803894 1 0 1 0.262563878254 12.8540148875 0 -W 2.13846206665 0 0 1 0.380294459465 15.777248 0 -V 2.28774404526 1 0 0 0.363060641874 13.771804 0 -Ta 1.56566202641 0 0 1 0.27575031592 18.1323455 0 -Nb 2.32224798203 0 1 0 0.278264741075 17.9685 0 -Mo 2.92410993576 0 1 0 0.38392782157 15.6279375 0 -HfRu 1.89930105209 0 1 1 0.357760253062 16.771007815 0 -HfPt 1.00957000256 0 0 1 0.415451631385 16.8491335 0 -Cr 3.67481803894 1 0 0 0.502346965021 11.943936 0 -HfOs 2.19160604477 0 0 1 0.353141211025 16.99037046 0 -Ir3V 2.28774404526 1 0 1 0.577685281306 13.8483708325 0 -Ir3Ti 1.65175902843 1 0 1 0.545346664721 14.2111440325 0 -Ir3Ta 1.65175902843 0 0 1 0.545307648931 14.670617615 0 -Ir3Nb 2.32224798203 0 1 1 0.542371383308 14.75004074 0 -HfRh3 1.34492301941 0 1 1 0.518200999109 14.9555867575 0 -HfIr3 1.65175902843 0 0 1 0.512284018996 15.128326695 0 -Rh 1.34492301941 0 1 0 0.656072313748 13.718 0 -Pd 0.0 0 1 0 0.679533993934 14.71596725 0 -Pt 0.708856999874 0 0 1 0.66405154315 15.059072 0 -CoTi 2.7561609745 1 0 0 0.483896935837 13.43261244 0 -Cr2Hf 3.67481803894 1 0 1 0.175826907904 30.332862 0 diff --git a/rdkit/ML/CompositeRun.py b/rdkit/ML/CompositeRun.py deleted file mode 100644 index 48dcd383516..00000000000 --- a/rdkit/ML/CompositeRun.py +++ /dev/null @@ -1,248 +0,0 @@ -# $Id$ -# -# Copyright (C) 2002-2006 greg Landrum and Rational Discovery LLC -# -# @@ All Rights Reserved @@ -# This file is part of the RDKit. -# The contents are covered by the terms of the BSD license -# which is included in the file license.txt, found at the root -# of the RDKit source tree. -# -""" contains a class to store parameters for and results from -Composite building - -""" -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) - -from rdkit import RDConfig -from rdkit.Dbase import DbModule -from rdkit.Dbase.DbConnection import DbConnect - - -def SetDefaults(runDetails): - """ initializes a details object with default values - - **Arguments** - - - details: (optional) a _CompositeRun.CompositeRun_ object. - If this is not provided, the global _runDetails will be used. - - **Returns** - - the initialized _CompositeRun_ object. - - - """ - runDetails.nRuns = 1 - runDetails.nModels = 10 - runDetails.outName = '' - runDetails.badName = '' - runDetails.splitRun = 0 - runDetails.splitFrac = 0.7 - runDetails.lockRandom = 0 - runDetails.randomActivities = 0 - runDetails.shuffleActivities = 0 - runDetails.replacementSelection = 0 - - # - # Tree Parameters - # - runDetails.useTrees = 1 - runDetails.pruneIt = 0 - runDetails.lessGreedy = 0 - runDetails.limitDepth = -1 - runDetails.recycleVars = 0 - runDetails.randomDescriptors = 0 # toggles growing of random forests - - # - # KNN Parameters - # - runDetails.useKNN = 0 - runDetails.knnDistFunc = '' - runDetails.knnNeighs = 0 - - # - # SigTree Parameters - # - runDetails.useSigTrees = 0 - runDetails.useCMIM = 0 - runDetails.allowCollections = False - - # - # Naive Bayes Classifier Parameters - # - runDetails.useNaiveBayes = 0 - runDetails.mEstimateVal = -1.0 - runDetails.useSigBayes = 0 - - # # - # # SVM Parameters - # # - # runDetails.useSVM = 0 - # runDetails.svmKernel = SVM.radialKernel - # runDetails.svmType = SVM.cSVCType - # runDetails.svmGamma = None - # runDetails.svmCost = None - # runDetails.svmWeights = None - # runDetails.svmDataType = 'float' - # runDetails.svmDegree = 3 - # runDetails.svmCoeff = 0.0 - # runDetails.svmEps = 0.001 - # runDetails.svmNu = 0.5 - # runDetails.svmCache = 40 - # runDetails.svmShrink = 1 - # runDetails.svmDataType='float' - - runDetails.bayesModel = 0 - runDetails.dbName = '' - runDetails.dbUser = RDConfig.defaultDBUser - runDetails.dbPassword = RDConfig.defaultDBPassword - runDetails.dbWhat = '*' - runDetails.dbWhere = '' - runDetails.dbJoin = '' - runDetails.qTableName = '' - runDetails.qBounds = [] - runDetails.qBoundCount = '' - runDetails.activityBounds = [] - runDetails.activityBoundsVals = '' - runDetails.detailedRes = 0 - runDetails.noScreen = 0 - runDetails.threshold = 0.0 - runDetails.filterFrac = 0.0 - runDetails.filterVal = 0.0 - runDetails.modelFilterVal = 0.0 - runDetails.modelFilterFrac = 0.0 - runDetails.internalHoldoutFrac = 0.3 - runDetails.pickleDataFileName = '' - runDetails.startAt = None - runDetails.persistTblName = '' - runDetails.randomSeed = (23, 42) - runDetails.note = '' - - return runDetails - - -class CompositeRun: - """ class to store parameters for and results from Composite building - - This class has a default set of fields which are added to the database. - - By default these fields are stored in a tuple, so they are immutable. This - is probably what you want. - - - """ - fields = ( - ("rundate", "varchar(32)"), - ("dbName", "varchar(200)"), - ("dbWhat", "varchar(200)"), - ("dbWhere", "varchar(200)"), - ("dbJoin", "varchar(200)"), - ("tableName", "varchar(80)"), - ("note", "varchar(120)"), - ("shuffled", "smallint"), - ("randomized", "smallint"), - ("overall_error", "float"), - ("holdout_error", "float"), - ("overall_fraction_dropped", "float"), - ("holdout_fraction_dropped", "float"), - ("overall_correct_conf", "float"), - ("overall_incorrect_conf", "float"), - ("holdout_correct_conf", "float"), - ("holdout_incorrect_conf", "float"), - ("overall_result_matrix", "varchar(256)"), - ("holdout_result_matrix", "varchar(256)"), - ("threshold", "float"), - ("splitFrac", "float"), - ("filterFrac", "float"), - ("filterVal", "float"), - ("modelFilterVal", "float"), - ("modelFilterFrac", "float"), - ("nModels", "int"), - ("limitDepth", "int"), - ("bayesModels", "int"), - ("qBoundCount", "varchar(3000)"), - ("activityBoundsVals", "varchar(200)"), - ("cmd", "varchar(500)"), - ("model", DbModule.binaryTypeName), - ) - - def _CreateTable(self, cn, tblName): - """ *Internal Use only* - - """ - names = map(lambda x: x.strip().upper(), cn.GetTableNames()) - if tblName.upper() not in names: - curs = cn.GetCursor() - fmt = [] - for name, value in self.fields: - fmt.append('%s %s' % (name, value)) - fmtStr = ','.join(fmt) - curs.execute('create table %s (%s)' % (tblName, fmtStr)) - cn.Commit() - else: - heads = [x.upper() for x in cn.GetColumnNames()] - curs = cn.GetCursor() - for name, value in self.fields: - if name.upper() not in heads: - curs.execute('alter table %s add %s %s' % (tblName, name, value)) - cn.Commit() - - def Store(self, db='models.gdb', table='results', user='sysdba', password='masterkey'): - """ adds the result to a database - - **Arguments** - - - db: name of the database to use - - - table: name of the table to use - - - user&password: connection information - - """ - cn = DbConnect(db, table, user, password) - curs = cn.GetCursor() - self._CreateTable(cn, table) - - cols = [] - vals = [] - for name, _ in self.fields: - try: - v = getattr(self, name) - except AttributeError: - pass - else: - cols.append('%s' % name) - vals.append(v) - - nToDo = len(vals) - qs = ','.join([DbModule.placeHolder] * nToDo) - vals = tuple(vals) - - cmd = 'insert into %s (%s) values (%s)' % (table, ','.join(cols), qs) - curs.execute(cmd, vals) - cn.Commit() - - def GetDataSet(self, **kwargs): - """ Returns a MLDataSet pulled from a database using our stored - values. - - """ - from rdkit.ML.Data import DataUtils - data = DataUtils.DBToData(self.dbName, self.tableName, user=self.dbUser, - password=self.dbPassword, what=self.dbWhat, where=self.dbWhere, - join=self.dbJoin, **kwargs) - - return data - - def GetDataSetInfo(self, **kwargs): - """ Returns a MLDataSet pulled from a database using our stored - values. - - """ - conn = DbConnect(self.dbName, self.tableName) - res = conn.GetColumnNamesAndTypes(join=self.dbJoin, what=self.dbWhat, where=self.dbWhere) - return res diff --git a/rdkit/ML/DecTree/BuildQuantTree.py b/rdkit/ML/DecTree/BuildQuantTree.py deleted file mode 100755 index c92be04020a..00000000000 --- a/rdkit/ML/DecTree/BuildQuantTree.py +++ /dev/null @@ -1,363 +0,0 @@ -# $Id$ -# -# Copyright (C) 2001-2008 greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -""" - -""" - -import numpy - -from rdkit import RDRandom as random -from rdkit.ML.Data import Quantize -from rdkit.ML.DecTree import ID3, QuantTree -from rdkit.ML.InfoTheory import entropy - - -def FindBest(resCodes, examples, nBoundsPerVar, nPossibleRes, nPossibleVals, attrs, exIndices=None, - **kwargs): - bestGain = -1e6 - best = -1 - bestBounds = [] - - if exIndices is None: - exIndices = list(range(len(examples))) - - if not len(exIndices): - return best, bestGain, bestBounds - - nToTake = kwargs.get('randomDescriptors', 0) - if nToTake > 0: - nAttrs = len(attrs) - if nToTake < nAttrs: - ids = list(range(nAttrs)) - random.shuffle(ids, random=random.random) - tmp = [attrs[x] for x in ids[:nToTake]] - attrs = tmp - - for var in attrs: - nBounds = nBoundsPerVar[var] - if nBounds > 0: - # vTable = map(lambda x,z=var:x[z],examples) - try: - vTable = [examples[x][var] for x in exIndices] - except IndexError: - print('index error retrieving variable: %d' % var) - raise - qBounds, gainHere = Quantize.FindVarMultQuantBounds(vTable, nBounds, resCodes, nPossibleRes) - # print('\tvar:',var,qBounds,gainHere) - elif nBounds == 0: - vTable = ID3.GenVarTable((examples[x] for x in exIndices), nPossibleVals, [var])[0] - gainHere = entropy.InfoGain(vTable) - qBounds = [] - else: - gainHere = -1e6 - qBounds = [] - if gainHere > bestGain: - bestGain = gainHere - bestBounds = qBounds - best = var - elif bestGain == gainHere: - if len(qBounds) < len(bestBounds): - best = var - bestBounds = qBounds - if best == -1: - print('best unaltered') - print('\tattrs:', attrs) - print('\tnBounds:', numpy.take(nBoundsPerVar, attrs)) - print('\texamples:') - for example in (examples[x] for x in exIndices): - print('\t\t', example) - - if 0: - print('BEST:', len(exIndices), best, bestGain, bestBounds) - if (len(exIndices) < 10): - print(len(exIndices), len(resCodes), len(examples)) - exs = [examples[x] for x in exIndices] - vals = [x[best] for x in exs] - sortIdx = numpy.argsort(vals) - sortVals = [exs[x] for x in sortIdx] - sortResults = [resCodes[x] for x in sortIdx] - for i in range(len(vals)): - print(' ', i, ['%.4f' % x for x in sortVals[i][1:-1]], sortResults[i]) - return best, bestGain, bestBounds - - -def BuildQuantTree(examples, target, attrs, nPossibleVals, nBoundsPerVar, depth=0, maxDepth=-1, - exIndices=None, **kwargs): - """ - **Arguments** - - - examples: a list of lists (nInstances x nVariables+1) of variable - values + instance values - - - target: an int - - - attrs: a list of ints indicating which variables can be used in the tree - - - nPossibleVals: a list containing the number of possible values of - every variable. - - - nBoundsPerVar: the number of bounds to include for each variable - - - depth: (optional) the current depth in the tree - - - maxDepth: (optional) the maximum depth to which the tree - will be grown - **Returns** - - a QuantTree.QuantTreeNode with the decision tree - - **NOTE:** This code cannot bootstrap (start from nothing...) - use _QuantTreeBoot_ (below) for that. - """ - tree = QuantTree.QuantTreeNode(None, 'node') - tree.SetData(-666) - nPossibleRes = nPossibleVals[-1] - - if exIndices is None: - exIndices = list(range(len(examples))) - - # counts of each result code: - resCodes = [int(x[-1]) for x in (examples[y] for y in exIndices)] - counts = [0] * nPossibleRes - for res in resCodes: - counts[res] += 1 - nzCounts = numpy.nonzero(counts)[0] - - if len(nzCounts) == 1: - # bottomed out because there is only one result code left - # with any counts (i.e. there's only one type of example - # left... this is GOOD!). - res = nzCounts[0] - tree.SetLabel(res) - tree.SetName(str(res)) - tree.SetTerminal(1) - elif len(attrs) == 0 or (maxDepth >= 0 and depth > maxDepth): - # Bottomed out: no variables left or max depth hit - # We don't really know what to do here, so - # use the heuristic of picking the most prevalent - # result - v = numpy.argmax(counts) - tree.SetLabel(v) - tree.SetName('%d?' % v) - tree.SetTerminal(1) - else: - # find the variable which gives us the largest information gain - best, _, bestBounds = FindBest(resCodes, examples, nBoundsPerVar, nPossibleRes, nPossibleVals, - attrs, exIndices=exIndices, **kwargs) - # remove that variable from the lists of possible variables - nextAttrs = attrs[:] - if not kwargs.get('recycleVars', 0): - nextAttrs.remove(best) - - # set some info at this node - tree.SetName('Var: %d' % (best)) - tree.SetLabel(best) - tree.SetQuantBounds(bestBounds) - tree.SetTerminal(0) - - # loop over possible values of the new variable and - # build a subtree for each one - indices = exIndices[:] - if len(bestBounds) > 0: - for bound in bestBounds: - nextExamples = [] - for index in indices[:]: - ex = examples[index] - if ex[best] < bound: - nextExamples.append(index) - indices.remove(index) - - if len(nextExamples) == 0: - # this particular value of the variable has no examples, - # so there's not much sense in recursing. - # This can (and does) happen. - v = numpy.argmax(counts) - tree.AddChild('%d' % v, label=v, data=0.0, isTerminal=1) - else: - # recurse - tree.AddChildNode( - BuildQuantTree(examples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=depth + 1, - maxDepth=maxDepth, exIndices=nextExamples, **kwargs)) - # add the last points remaining - nextExamples = [] - for index in indices: - nextExamples.append(index) - if len(nextExamples) == 0: - v = numpy.argmax(counts) - tree.AddChild('%d' % v, label=v, data=0.0, isTerminal=1) - else: - tree.AddChildNode( - BuildQuantTree(examples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=depth + 1, - maxDepth=maxDepth, exIndices=nextExamples, **kwargs)) - else: - for val in range(nPossibleVals[best]): - nextExamples = [] - for idx in exIndices: - if examples[idx][best] == val: - nextExamples.append(idx) - if len(nextExamples) == 0: - v = numpy.argmax(counts) - tree.AddChild('%d' % v, label=v, data=0.0, isTerminal=1) - else: - tree.AddChildNode( - BuildQuantTree(examples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=depth + 1, - maxDepth=maxDepth, exIndices=nextExamples, **kwargs)) - return tree - - -def QuantTreeBoot(examples, attrs, nPossibleVals, nBoundsPerVar, initialVar=None, maxDepth=-1, - **kwargs): - """ Bootstrapping code for the QuantTree - - If _initialVar_ is not set, the algorithm will automatically - choose the first variable in the tree (the standard greedy - approach). Otherwise, _initialVar_ will be used as the first - split. - - """ - attrs = list(attrs) - for i in range(len(nBoundsPerVar)): - if nBoundsPerVar[i] == -1 and i in attrs: - attrs.remove(i) - - tree = QuantTree.QuantTreeNode(None, 'node') - nPossibleRes = nPossibleVals[-1] - tree._nResultCodes = nPossibleRes - - resCodes = [int(x[-1]) for x in examples] - counts = [0] * nPossibleRes - for res in resCodes: - counts[res] += 1 - if initialVar is None: - best, gainHere, qBounds = FindBest(resCodes, examples, nBoundsPerVar, nPossibleRes, - nPossibleVals, attrs, **kwargs) - else: - best = initialVar - if nBoundsPerVar[best] > 0: - vTable = map(lambda x, z=best: x[z], examples) - qBounds, gainHere = Quantize.FindVarMultQuantBounds(vTable, nBoundsPerVar[best], resCodes, - nPossibleRes) - elif nBoundsPerVar[best] == 0: - vTable = ID3.GenVarTable(examples, nPossibleVals, [best])[0] - gainHere = entropy.InfoGain(vTable) - qBounds = [] - else: - gainHere = -1e6 - qBounds = [] - - tree.SetName('Var: %d' % (best)) - tree.SetData(gainHere) - tree.SetLabel(best) - tree.SetTerminal(0) - tree.SetQuantBounds(qBounds) - nextAttrs = list(attrs) - if not kwargs.get('recycleVars', 0): - nextAttrs.remove(best) - - indices = list(range(len(examples))) - if len(qBounds) > 0: - for bound in qBounds: - nextExamples = [] - for index in list(indices): - ex = examples[index] - if ex[best] < bound: - nextExamples.append(ex) - indices.remove(index) - - if len(nextExamples): - tree.AddChildNode( - BuildQuantTree(nextExamples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=1, - maxDepth=maxDepth, **kwargs)) - else: - v = numpy.argmax(counts) - tree.AddChild('%d??' % (v), label=v, data=0.0, isTerminal=1) - # add the last points remaining - nextExamples = [] - for index in indices: - nextExamples.append(examples[index]) - if len(nextExamples) != 0: - tree.AddChildNode( - BuildQuantTree(nextExamples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=1, - maxDepth=maxDepth, **kwargs)) - else: - v = numpy.argmax(counts) - tree.AddChild('%d??' % (v), label=v, data=0.0, isTerminal=1) - else: - for val in range(nPossibleVals[best]): - nextExamples = [] - for example in examples: - if example[best] == val: - nextExamples.append(example) - if len(nextExamples) != 0: - tree.AddChildNode( - BuildQuantTree(nextExamples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=1, - maxDepth=maxDepth, **kwargs)) - else: - v = numpy.argmax(counts) - tree.AddChild('%d??' % (v), label=v, data=0.0, isTerminal=1) - return tree - - -def TestTree(): - """ testing code for named trees - - """ - examples1 = [['p1', 0, 1, 0, 0], ['p2', 0, 0, 0, 1], ['p3', 0, 0, 1, 2], ['p4', 0, 1, 1, 2], - ['p5', 1, 0, 0, 2], ['p6', 1, 0, 1, 2], ['p7', 1, 1, 0, 2], ['p8', 1, 1, 1, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 2, 2, 2, 3] - t1 = ID3.ID3Boot(examples1, attrs, nPossibleVals, maxDepth=1) - t1.Print() - - -def TestQuantTree(): # pragma: nocover - """ Testing code for named trees - - The created pkl file is required by the unit test code. - """ - examples1 = [['p1', 0, 1, 0.1, 0], ['p2', 0, 0, 0.1, 1], ['p3', 0, 0, 1.1, - 2], ['p4', 0, 1, 1.1, 2], - ['p5', 1, 0, 0.1, 2], ['p6', 1, 0, 1.1, 2], ['p7', 1, 1, 0.1, 2], - ['p8', 1, 1, 1.1, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 2, 2, 0, 3] - boundsPerVar = [0, 0, 0, 1, 0] - - print('base') - t1 = QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) - t1.Pickle('test_data/QuantTree1.pkl') - t1.Print() - - print('depth limit') - t1 = QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar, maxDepth=1) - t1.Pickle('test_data/QuantTree1.pkl') - t1.Print() - - -def TestQuantTree2(): # pragma: nocover - """ testing code for named trees - - The created pkl file is required by the unit test code. - """ - examples1 = [['p1', 0.1, 1, 0.1, 0], ['p2', 0.1, 0, 0.1, 1], ['p3', 0.1, 0, 1.1, 2], - ['p4', 0.1, 1, 1.1, 2], ['p5', 1.1, 0, 0.1, 2], ['p6', 1.1, 0, 1.1, 2], - ['p7', 1.1, 1, 0.1, 2], ['p8', 1.1, 1, 1.1, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 0, 2, 0, 3] - boundsPerVar = [0, 1, 0, 1, 0] - - t1 = QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) - t1.Print() - t1.Pickle('test_data/QuantTree2.pkl') - - for example in examples1: - print(example, t1.ClassifyExample(example)) - - -if __name__ == "__main__": # pragma: nocover - TestTree() - TestQuantTree() - # TestQuantTree2() diff --git a/rdkit/ML/DecTree/BuildSigTree.py b/rdkit/ML/DecTree/BuildSigTree.py deleted file mode 100644 index 02373de1623..00000000000 --- a/rdkit/ML/DecTree/BuildSigTree.py +++ /dev/null @@ -1,231 +0,0 @@ -# $Id$ -# -# Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -""" - -""" - -import copy -import random - -import numpy - -from rdkit.DataStructs.VectCollection import VectCollection -from rdkit.ML import InfoTheory -from rdkit.ML.DecTree import SigTree - -try: - from rdkit.ML.FeatureSelect import CMIM -except ImportError: - CMIM = None - - -def _GenerateRandomEnsemble(nToInclude, nBits): - """ Generates a random subset of a group of indices - - **Arguments** - - - nToInclude: the size of the desired set - - - nBits: the maximum index to be included in the set - - **Returns** - - a list of indices - - """ - # Before Python 2.3 added the random.sample() function, this was - # way more complicated: - return random.sample(range(nBits), nToInclude) - - -def BuildSigTree(examples, nPossibleRes, ensemble=None, random=0, - metric=InfoTheory.InfoType.BIASENTROPY, biasList=[1], depth=0, maxDepth=-1, - useCMIM=0, allowCollections=False, verbose=0, **kwargs): - """ - **Arguments** - - - examples: the examples to be classified. Each example - should be a sequence at least three entries long, with - entry 0 being a label, entry 1 a BitVector and entry -1 - an activity value - - - nPossibleRes: the number of result codes possible - - - ensemble: (optional) if this argument is provided, it - should be a sequence which is used to limit the bits - which are actually considered as potential descriptors. - The default is None (use all bits). - - - random: (optional) If this argument is nonzero, it - specifies the number of bits to be randomly selected - for consideration at this node (i.e. this toggles the - growth of Random Trees). - The default is 0 (no random descriptor selection) - - - metric: (optional) This is an _InfoTheory.InfoType_ and - sets the metric used to rank the bits. - The default is _InfoTheory.InfoType.BIASENTROPY_ - - - biasList: (optional) If provided, this provides a bias - list for the bit ranker. - See the _InfoTheory.InfoBitRanker_ docs for an explanation - of bias. - The default value is [1], which biases towards actives. - - - maxDepth: (optional) the maximum depth to which the tree - will be grown - The default is -1 (no depth limit). - - - useCMIM: (optional) if this is >0, the CMIM algorithm - (conditional mutual information maximization) will be - used to select the descriptors used to build the trees. - The value of the variable should be set to the number - of descriptors to be used. This option and the - ensemble option are mutually exclusive (CMIM will not be - used if the ensemble is set), but it happily coexsts - with the random argument (to only consider random subsets - of the top N CMIM bits) - The default is 0 (do not use CMIM) - - - depth: (optional) the current depth in the tree - This is used in the recursion and should not be set - by the client. - - **Returns** - - a SigTree.SigTreeNode with the root of the decision tree - - """ - if verbose: - print(' ' * depth, 'Build') - tree = SigTree.SigTreeNode(None, 'node', level=depth) - tree.SetData(-666) - # tree.SetExamples(examples) - - # counts of each result code: - # resCodes = map(lambda x:int(x[-1]),examples) - resCodes = [int(x[-1]) for x in examples] - # print('resCodes:',resCodes) - counts = [0] * nPossibleRes - for res in resCodes: - counts[res] += 1 - # print(' '*depth,'counts:',counts) - - nzCounts = numpy.nonzero(counts)[0] - if verbose: - print(' ' * depth, '\tcounts:', counts) - if len(nzCounts) == 1: - # bottomed out because there is only one result code left - # with any counts (i.e. there's only one type of example - # left... this is GOOD!). - res = nzCounts[0] - tree.SetLabel(res) - tree.SetName(str(res)) - tree.SetTerminal(1) - elif maxDepth >= 0 and depth > maxDepth: - # Bottomed out: max depth hit - # We don't really know what to do here, so - # use the heuristic of picking the most prevalent - # result - v = numpy.argmax(counts) - tree.SetLabel(v) - tree.SetName('%d?' % v) - tree.SetTerminal(1) - else: - # find the variable which gives us the best improvement - # We do this with an InfoBitRanker: - fp = examples[0][1] - nBits = fp.GetNumBits() - ranker = InfoTheory.InfoBitRanker(nBits, nPossibleRes, metric) - if biasList: - ranker.SetBiasList(biasList) - if CMIM is not None and useCMIM > 0 and not ensemble: - ensemble = CMIM.SelectFeatures(examples, useCMIM, bvCol=1) - if random: - if ensemble: - if len(ensemble) > random: - picks = _GenerateRandomEnsemble(random, len(ensemble)) - availBits = list(numpy.take(ensemble, picks)) - else: - availBits = list(range(len(ensemble))) - else: - availBits = _GenerateRandomEnsemble(random, nBits) - else: - availBits = None - if availBits: - ranker.SetMaskBits(availBits) - # print(' 2:'*depth,availBits) - - useCollections = isinstance(examples[0][1], VectCollection) - for example in examples: - # print(' '*depth,example[1].ToBitString(),example[-1]) - if not useCollections: - ranker.AccumulateVotes(example[1], example[-1]) - else: - example[1].Reset() - ranker.AccumulateVotes(example[1].orVect, example[-1]) - - try: - bitInfo = ranker.GetTopN(1)[0] - best = int(bitInfo[0]) - gain = bitInfo[1] - except Exception: - import traceback - traceback.print_exc() - print('get top n failed') - gain = -1.0 - if gain <= 0.0: - v = numpy.argmax(counts) - tree.SetLabel(v) - tree.SetName('?%d?' % v) - tree.SetTerminal(1) - return tree - best = int(bitInfo[0]) - # print(' '*depth,'\tbest:',bitInfo) - if verbose: - print(' ' * depth, '\tbest:', bitInfo) - # set some info at this node - tree.SetName('Bit-%d' % (best)) - tree.SetLabel(best) - # tree.SetExamples(examples) - tree.SetTerminal(0) - - # loop over possible values of the new variable and - # build a subtree for each one - onExamples = [] - offExamples = [] - for example in examples: - if example[1][best]: - if allowCollections and useCollections: - sig = copy.copy(example[1]) - sig.DetachVectsNotMatchingBit(best) - ex = [example[0], sig] - if len(example) > 2: - ex.extend(example[2:]) - example = ex - onExamples.append(example) - else: - offExamples.append(example) - # print(' '*depth,len(offExamples),len(onExamples)) - for ex in (offExamples, onExamples): - if len(ex) == 0: - v = numpy.argmax(counts) - tree.AddChild('%d??' % v, label=v, data=0.0, isTerminal=1) - else: - child = BuildSigTree(ex, nPossibleRes, random=random, ensemble=ensemble, metric=metric, - biasList=biasList, depth=depth + 1, maxDepth=maxDepth, verbose=verbose) - if child is None: - v = numpy.argmax(counts) - tree.AddChild('%d???' % v, label=v, data=0.0, isTerminal=1) - else: - tree.AddChildNode(child) - return tree - - -def SigTreeBuilder(examples, attrs, nPossibleVals, initialVar=None, ensemble=None, - randomDescriptors=0, **kwargs): - nRes = nPossibleVals[-1] - return BuildSigTree(examples, nRes, random=randomDescriptors, **kwargs) diff --git a/rdkit/ML/DecTree/CrossValidate.py b/rdkit/ML/DecTree/CrossValidate.py deleted file mode 100755 index 7f0aa702004..00000000000 --- a/rdkit/ML/DecTree/CrossValidate.py +++ /dev/null @@ -1,216 +0,0 @@ -# -# Copyright (C) 2000 greg Landrum -# -""" handles doing cross validation with decision trees - -This is, perhaps, a little misleading. For the purposes of this module, -cross validation == evaluating the accuracy of a tree. - - -""" - -import numpy - -from rdkit.ML.Data import SplitData -from rdkit.ML.DecTree import ID3, randomtest - - -def ChooseOptimalRoot(examples, trainExamples, testExamples, attrs, nPossibleVals, treeBuilder, - nQuantBounds=[], **kwargs): - """ loops through all possible tree roots and chooses the one which produces the best tree - - **Arguments** - - - examples: the full set of examples - - - trainExamples: the training examples - - - testExamples: the testing examples - - - attrs: a list of attributes to consider in the tree building - - - nPossibleVals: a list of the number of possible values each variable can adopt - - - treeBuilder: the function to be used to actually build the tree - - - nQuantBounds: an optional list. If present, it's assumed that the builder - algorithm takes this argument as well (for building QuantTrees) - - **Returns** - - The best tree found - - **Notes** - - 1) Trees are built using _trainExamples_ - - 2) Testing of each tree (to determine which is best) is done using _CrossValidate_ and - the entire set of data (i.e. all of _examples_) - - 3) _trainExamples_ is not used at all, which immediately raises the question of - why it's even being passed in - - """ - attrs = attrs[:] - if nQuantBounds: - for i in range(len(nQuantBounds)): - if nQuantBounds[i] == -1 and i in attrs: - attrs.remove(i) - nAttrs = len(attrs) - trees = [None] * nAttrs - errs = [0] * nAttrs - errs[0] = 1e6 - - for i in range(1, nAttrs): - argD = {'initialVar': attrs[i]} - argD.update(kwargs) - if nQuantBounds is None or nQuantBounds == []: - trees[i] = treeBuilder(trainExamples, attrs, nPossibleVals, **argD) - else: - trees[i] = treeBuilder(trainExamples, attrs, nPossibleVals, nQuantBounds, **argD) - if trees[i]: - errs[i], _ = CrossValidate(trees[i], examples, appendExamples=0) - else: - errs[i] = 1e6 - best = numpy.argmin(errs) - # FIX: this used to say 'trees[i]', could that possibly have been right? - return trees[best] - - -def CrossValidate(tree, testExamples, appendExamples=0): - """ Determines the classification error for the testExamples - - **Arguments** - - - tree: a decision tree (or anything supporting a _ClassifyExample()_ method) - - - testExamples: a list of examples to be used for testing - - - appendExamples: a toggle which is passed along to the tree as it does - the classification. The trees can use this to store the examples they - classify locally. - - **Returns** - - a 2-tuple consisting of: - - 1) the percent error of the tree - - 2) a list of misclassified examples - - """ - nTest = len(testExamples) - nBad = 0 - badExamples = [] - for i in range(nTest): - testEx = testExamples[i] - trueRes = testEx[-1] - res = tree.ClassifyExample(testEx, appendExamples) - if (trueRes != res).any(): - badExamples.append(testEx) - nBad += 1 - - return float(nBad) / nTest, badExamples - - -def CrossValidationDriver(examples, attrs, nPossibleVals, holdOutFrac=.3, silent=0, - calcTotalError=0, treeBuilder=ID3.ID3Boot, lessGreedy=0, startAt=None, - nQuantBounds=[], maxDepth=-1, **kwargs): - """ Driver function for building trees and doing cross validation - - **Arguments** - - - examples: the full set of examples - - - attrs: a list of attributes to consider in the tree building - - - nPossibleVals: a list of the number of possible values each variable can adopt - - - holdOutFrac: the fraction of the data which should be reserved for the hold-out set - (used to calculate the error) - - - silent: a toggle used to control how much visual noise this makes as it goes. - - - calcTotalError: a toggle used to indicate whether the classification error - of the tree should be calculated using the entire data set (when true) or just - the training hold out set (when false) - - - treeBuilder: the function to call to build the tree - - - lessGreedy: toggles use of the less greedy tree growth algorithm (see - _ChooseOptimalRoot_). - - - startAt: forces the tree to be rooted at this descriptor - - - nQuantBounds: an optional list. If present, it's assumed that the builder - algorithm takes this argument as well (for building QuantTrees) - - - maxDepth: an optional integer. If present, it's assumed that the builder - algorithm takes this argument as well - - **Returns** - - a 2-tuple containing: - - 1) the tree - - 2) the cross-validation error of the tree - - """ - nTot = len(examples) - if not kwargs.get('replacementSelection', 0): - testIndices, trainIndices = SplitData.SplitIndices(nTot, holdOutFrac, silent=1, legacy=1, - replacement=0) - else: - testIndices, trainIndices = SplitData.SplitIndices(nTot, holdOutFrac, silent=1, legacy=0, - replacement=1) - trainExamples = [examples[x] for x in trainIndices] - testExamples = [examples[x] for x in testIndices] - - nTrain = len(trainExamples) - if not silent: - print('Training with %d examples' % (nTrain)) - - if not lessGreedy: - if nQuantBounds is None or nQuantBounds == []: - tree = treeBuilder(trainExamples, attrs, nPossibleVals, initialVar=startAt, maxDepth=maxDepth, - **kwargs) - else: - tree = treeBuilder(trainExamples, attrs, nPossibleVals, nQuantBounds, initialVar=startAt, - maxDepth=maxDepth, **kwargs) - else: - tree = ChooseOptimalRoot(examples, trainExamples, testExamples, attrs, nPossibleVals, - treeBuilder, nQuantBounds, maxDepth=maxDepth, **kwargs) - - nTest = len(testExamples) - if not silent: - print('Testing with %d examples' % nTest) - if not calcTotalError: - xValError, badExamples = CrossValidate(tree, testExamples, appendExamples=1) - else: - xValError, badExamples = CrossValidate(tree, examples, appendExamples=0) - if not silent: - print('Validation error was %%%4.2f' % (100 * xValError)) - tree.SetBadExamples(badExamples) - tree.SetTrainingExamples(trainExamples) - tree.SetTestExamples(testExamples) - tree._trainIndices = trainIndices - return tree, xValError - - -def TestRun(): - """ testing code """ - examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nExamples=200) - tree, _ = CrossValidationDriver(examples, attrs, nPossibleVals) - - tree.Pickle('save.pkl') - - import copy - t2 = copy.deepcopy(tree) - print('t1 == t2', tree == t2) - l = [tree] - print('t2 in [tree]', t2 in l, l.index(t2)) - - -if __name__ == '__main__': # pragma: nocover - TestRun() diff --git a/rdkit/ML/DecTree/DecTree.py b/rdkit/ML/DecTree/DecTree.py deleted file mode 100644 index 2f3c1720182..00000000000 --- a/rdkit/ML/DecTree/DecTree.py +++ /dev/null @@ -1,122 +0,0 @@ -# -# Copyright (C) 2000-2004 greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -""" Defines the class _DecTreeNode_, used to represent decision trees - - _DecTreeNode_ is derived from _Tree.TreeNode_ - -""" -from rdkit.ML.DecTree import Tree - - -class DecTreeNode(Tree.TreeNode): - """ This is used to represent decision trees - - _DecTreeNode_s are simultaneously the roots and branches of decision trees. - Everything is nice and recursive. - - _DecTreeNode_s can save the following pieces of internal state, accessible via - standard setter/getter functions: - - 1) _Examples_: a list of examples which have been classified - - 2) _BadExamples_: a list of examples which have been misclassified - - 3) _TrainingExamples_: the list of examples used to train the tree - - 4) _TestExamples_: the list of examples used to test the tree - - """ - - def __init__(self, *args, **kwargs): - # apply(Tree.TreeNode.__init__,(self,)+args,kwargs) - Tree.TreeNode.__init__(self, *args, **kwargs) - self.examples = [] - self.badExamples = [] - self.trainingExamples = [] - self.testExamples = [] - - def ClassifyExample(self, example, appendExamples=0): - """ Recursively classify an example by running it through the tree - - **Arguments** - - - example: the example to be classified - - - appendExamples: if this is nonzero then this node (and all children) - will store the example - - **Returns** - - the classification of _example_ - - **NOTE:** - In the interest of speed, I don't use accessor functions - here. So if you subclass DecTreeNode for your own trees, you'll - have to either include ClassifyExample or avoid changing the names - of the instance variables this needs. - - """ - if appendExamples: - self.examples.append(example) - if self.terminalNode: - return self.label - else: - val = example[self.label] - return self.children[val].ClassifyExample(example, appendExamples) - - def AddChild(self, name, label=None, data=None, isTerminal=0): - """ Constructs and adds a child with the specified data to our list - - **Arguments** - - - name: the name of the new node - - - label: the label of the new node (should be an integer) - - - data: the data to be stored in the new node - - - isTerminal: a toggle to indicate whether or not the new node is - a terminal (leaf) node. - - **Returns* - - the _DecTreeNode_ which is constructed - - """ - child = DecTreeNode(self, name, label, data, level=self.level + 1, isTerminal=isTerminal) - self.children.append(child) - return child - - def GetExamples(self): - return self.examples - - def SetExamples(self, examples): - self.examples = examples - - def GetBadExamples(self): - return self.badExamples - - def SetBadExamples(self, examples): - self.badExamples = examples - - def GetTrainingExamples(self): - return self.trainingExamples - - def SetTrainingExamples(self, examples): - self.trainingExamples = examples - - def GetTestExamples(self): - return self.testExamples - - def SetTestExamples(self, examples): - self.testExamples = examples - - def ClearExamples(self): - self.examples = [] - self.badExamples = [] - self.trainingExamples = [] - self.testExamples = [] - for child in self.GetChildren(): - child.ClearExamples() diff --git a/rdkit/ML/DecTree/Forest.py b/rdkit/ML/DecTree/Forest.py deleted file mode 100755 index dcd50daef29..00000000000 --- a/rdkit/ML/DecTree/Forest.py +++ /dev/null @@ -1,302 +0,0 @@ -# -# Copyright (C) 2000-2008 greg Landrum -# -""" code for dealing with forests (collections) of decision trees - -**NOTE** This code should be obsolete now that ML.Composite.Composite is up and running. - -""" - -import pickle - -import numpy - -from rdkit.ML.DecTree import CrossValidate, PruneTree - - -class Forest(object): - """a forest of unique decision trees. - - adding an existing tree just results in its count field being incremented - and the errors being averaged. - - typical usage: - - 1) grow the forest with AddTree until happy with it - - 2) call AverageErrors to calculate the average error values - - 3) call SortTrees to put things in order by either error or count - - """ - - def MakeHistogram(self): - """ creates a histogram of error/count pairs - - """ - nExamples = len(self.treeList) - histo = [] - i = 1 - lastErr = self.errList[0] - countHere = self.countList[0] - eps = 0.001 - while i < nExamples: - if self.errList[i] - lastErr > eps: - histo.append((lastErr, countHere)) - lastErr = self.errList[i] - countHere = self.countList[i] - else: - countHere = countHere + self.countList[i] - i = i + 1 - - return histo - - def CollectVotes(self, example): - """ collects votes across every member of the forest for the given example - - **Returns** - - a list of the results - - """ - nTrees = len(self.treeList) - votes = [0] * nTrees - for i in range(nTrees): - votes[i] = self.treeList[i].ClassifyExample(example) - return votes - - def ClassifyExample(self, example): - """ classifies the given example using the entire forest - - **returns** a result and a measure of confidence in it. - - **FIX:** statistics sucks... I'm not seeing an obvious way to get - the confidence intervals. For that matter, I'm not seeing - an unobvious way. - - For now, this is just treated as a voting problem with the confidence - measure being the percent of trees which voted for the winning result. - """ - self.treeVotes = self.CollectVotes(example) - votes = [0] * len(self._nPossible) - for i in range(len(self.treeList)): - res = self.treeVotes[i] - votes[res] = votes[res] + self.countList[i] - - totVotes = sum(votes) - res = numpy.argmax(votes) - # print 'v:',res,votes,totVotes - return res, float(votes[res]) / float(totVotes) - - def GetVoteDetails(self): - """ Returns the details of the last vote the forest conducted - - this will be an empty list if no voting has yet been done - - """ - return self.treeVotes - - def Grow(self, examples, attrs, nPossibleVals, nTries=10, pruneIt=0, lessGreedy=0): - """ Grows the forest by adding trees - - **Arguments** - - - examples: the examples to be used for training - - - attrs: a list of the attributes to be used in training - - - nPossibleVals: a list with the number of possible values each variable - (as well as the result) can take on - - - nTries: the number of new trees to add - - - pruneIt: a toggle for whether or not the tree should be pruned - - - lessGreedy: toggles the use of a less greedy construction algorithm where - each possible tree root is used. The best tree from each step is actually - added to the forest. - - """ - self._nPossible = nPossibleVals - for i in range(nTries): - tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, silent=1, - calcTotalError=1, lessGreedy=lessGreedy) - if pruneIt: - tree, frac2 = PruneTree.PruneTree(tree, tree.GetTrainingExamples(), tree.GetTestExamples(), - minimizeTestErrorOnly=0) - print('prune: ', frac, frac2) - frac = frac2 - self.AddTree(tree, frac) - if i % (nTries / 10) == 0: - print('Cycle: % 4d' % (i)) - - def Pickle(self, fileName='foo.pkl'): - """ Writes this forest off to a file so that it can be easily loaded later - - **Arguments** - - fileName is the name of the file to be written - - """ - pFile = open(fileName, 'wb+') - pickle.dump(self, pFile, 1) - pFile.close() - - def AddTree(self, tree, error): - """ Adds a tree to the forest - - If an identical tree is already present, its count is incremented - - **Arguments** - - - tree: the new tree - - - error: its error value - - **NOTE:** the errList is run as an accumulator, - you probably want to call AverageErrors after finishing the forest - - """ - if tree in self.treeList: - idx = self.treeList.index(tree) - self.errList[idx] = self.errList[idx] + error - self.countList[idx] = self.countList[idx] + 1 - else: - self.treeList.append(tree) - self.errList.append(error) - self.countList.append(1) - - def AverageErrors(self): - """ convert summed error to average error - - This does the conversion in place - """ - self.errList = [x / y for x, y in zip(self.errList, self.countList)] - - def SortTrees(self, sortOnError=1): - """ sorts the list of trees - - **Arguments** - - sortOnError: toggles sorting on the trees' errors rather than their counts - - """ - if sortOnError: - order = numpy.argsort(self.errList) - else: - order = numpy.argsort(self.countList) - - # these elaborate contortions are required because, at the time this - # code was written, Numeric arrays didn't unpickle so well... - self.treeList = [self.treeList[x] for x in order] - self.countList = [self.countList[x] for x in order] - self.errList = [self.errList[x] for x in order] - - def GetTree(self, i): - return self.treeList[i] - - def SetTree(self, i, val): - self.treeList[i] = val - - def GetCount(self, i): - return self.countList[i] - - def SetCount(self, i, val): - self.countList[i] = val - - def GetError(self, i): - return self.errList[i] - - def SetError(self, i, val): - self.errList[i] = val - - def GetDataTuple(self, i): - """ returns all relevant data about a particular tree in the forest - - **Arguments** - - i: an integer indicating which tree should be returned - - **Returns** - - a 3-tuple consisting of: - - 1) the tree - - 2) its count - - 3) its error - """ - return (self.treeList[i], self.countList[i], self.errList[i]) - - def SetDataTuple(self, i, tup): - """ sets all relevant data for a particular tree in the forest - - **Arguments** - - - i: an integer indicating which tree should be returned - - - tup: a 3-tuple consisting of: - - 1) the tree - - 2) its count - - 3) its error - """ - self.treeList[i], self.countList[i], self.errList[i] = tup - - def GetAllData(self): - """ Returns everything we know - - **Returns** - - a 3-tuple consisting of: - - 1) our list of trees - - 2) our list of tree counts - - 3) our list of tree errors - - """ - return (self.treeList, self.countList, self.errList) - - def __len__(self): - """ allows len(forest) to work - - """ - return len(self.treeList) - - def __getitem__(self, which): - """ allows forest[i] to work. return the data tuple - - """ - return self.GetDataTuple(which) - - def __str__(self): - """ allows the forest to show itself as a string - - """ - outStr = 'Forest\n' - for i in range(len(self.treeList)): - outStr = (outStr + ' Tree % 4d: % 5d occurrences %%% 5.2f average error\n' % - (i, self.countList[i], 100. * self.errList[i])) - return outStr - - def __init__(self): - self.treeList = [] - self.errList = [] - self.countList = [] - self.treeVotes = [] - - -if __name__ == '__main__': - from rdkit.ML.DecTree import DecTree - f = Forest() - n = DecTree.DecTreeNode(None, 'foo') - f.AddTree(n, 0.5) - f.AddTree(n, 0.5) - f.AverageErrors() - f.SortTrees() - print(f) diff --git a/rdkit/ML/DecTree/ID3.py b/rdkit/ML/DecTree/ID3.py deleted file mode 100644 index 476491e2466..00000000000 --- a/rdkit/ML/DecTree/ID3.py +++ /dev/null @@ -1,219 +0,0 @@ -# -# Copyright (C) 2000-2008 greg Landrum and Rational Discovery LLC -# -""" ID3 Decision Trees - - contains an implementation of the ID3 decision tree algorithm - as described in Tom Mitchell's book "Machine Learning" - - It relies upon the _Tree.TreeNode_ data structure (or something - with the same API) defined locally to represent the trees - -""" - -import numpy - -from rdkit.ML.DecTree import DecTree -from rdkit.ML.InfoTheory import entropy - - -def CalcTotalEntropy(examples, nPossibleVals): - """ Calculates the total entropy of the data set (w.r.t. the results) - - **Arguments** - - - examples: a list (nInstances long) of lists of variable values + instance - values - - nPossibleVals: a list (nVars long) of the number of possible values each variable - can adopt. - - **Returns** - - a float containing the informational entropy of the data set. - - """ - nRes = nPossibleVals[-1] - resList = numpy.zeros(nRes, 'i') - for example in examples: - res = int(example[-1]) - resList[res] += 1 - return entropy.InfoEntropy(resList) - - -def GenVarTable(examples, nPossibleVals, vars): - """Generates a list of variable tables for the examples passed in. - - The table for a given variable records the number of times each possible value - of that variable appears for each possible result of the function. - - **Arguments** - - - examples: a list (nInstances long) of lists of variable values + instance - values - - - nPossibleVals: a list containing the number of possible values of - each variable + the number of values of the function. - - - vars: a list of the variables to include in the var table - - - **Returns** - - a list of variable result tables. Each table is a Numeric array - which is varValues x nResults - """ - nVars = len(vars) - res = [None] * nVars - nFuncVals = nPossibleVals[-1] - - for i in range(nVars): - res[i] = numpy.zeros((nPossibleVals[vars[i]], nFuncVals), 'i') - for example in examples: - val = int(example[-1]) - for i in range(nVars): - res[i][int(example[vars[i]]), val] += 1 - - return res - - -def ID3(examples, target, attrs, nPossibleVals, depth=0, maxDepth=-1, **kwargs): - """ Implements the ID3 algorithm for constructing decision trees. - - From Mitchell's book, page 56 - - This is *slightly* modified from Mitchell's book because it supports - multivalued (non-binary) results. - - **Arguments** - - - examples: a list (nInstances long) of lists of variable values + instance - values - - - target: an int - - - attrs: a list of ints indicating which variables can be used in the tree - - - nPossibleVals: a list containing the number of possible values of - every variable. - - - depth: (optional) the current depth in the tree - - - maxDepth: (optional) the maximum depth to which the tree - will be grown - - **Returns** - - a DecTree.DecTreeNode with the decision tree - - **NOTE:** This code cannot bootstrap (start from nothing...) - use _ID3Boot_ (below) for that. - """ - varTable = GenVarTable(examples, nPossibleVals, attrs) - tree = DecTree.DecTreeNode(None, 'node') - - # store the total entropy... in case that is interesting - totEntropy = CalcTotalEntropy(examples, nPossibleVals) - tree.SetData(totEntropy) - # tree.SetExamples(examples) - - # the matrix of results for this target: - tMat = GenVarTable(examples, nPossibleVals, [target])[0] - # counts of each result code: - counts = sum(tMat) - nzCounts = numpy.nonzero(counts)[0] - - if len(nzCounts) == 1: - # bottomed out because there is only one result code left - # with any counts (i.e. there's only one type of example - # left... this is GOOD!). - res = nzCounts[0] - tree.SetLabel(res) - tree.SetName(str(res)) - tree.SetTerminal(1) - elif len(attrs) == 0 or (maxDepth >= 0 and depth >= maxDepth): - # Bottomed out: no variables left or max depth hit - # We don't really know what to do here, so - # use the heuristic of picking the most prevalent - # result - v = numpy.argmax(counts) - tree.SetLabel(v) - tree.SetName('%d?' % v) - tree.SetTerminal(1) - else: - # find the variable which gives us the largest information gain - - gains = [entropy.InfoGain(x) for x in varTable] - best = attrs[numpy.argmax(gains)] - - # remove that variable from the lists of possible variables - nextAttrs = attrs[:] - if not kwargs.get('recycleVars', 0): - nextAttrs.remove(best) - - # set some info at this node - tree.SetName('Var: %d' % best) - tree.SetLabel(best) - # tree.SetExamples(examples) - tree.SetTerminal(0) - - # loop over possible values of the new variable and - # build a subtree for each one - for val in range(nPossibleVals[best]): - nextExamples = [] - for example in examples: - if example[best] == val: - nextExamples.append(example) - if len(nextExamples) == 0: - # this particular value of the variable has no examples, - # so there's not much sense in recursing. - # This can (and does) happen. - v = numpy.argmax(counts) - tree.AddChild('%d' % v, label=v, data=0.0, isTerminal=1) - else: - # recurse - tree.AddChildNode( - ID3(nextExamples, best, nextAttrs, nPossibleVals, depth + 1, maxDepth, **kwargs)) - return tree - - -def ID3Boot(examples, attrs, nPossibleVals, initialVar=None, depth=0, maxDepth=-1, **kwargs): - """ Bootstrapping code for the ID3 algorithm - - see ID3 for descriptions of the arguments - - If _initialVar_ is not set, the algorithm will automatically - choose the first variable in the tree (the standard greedy - approach). Otherwise, _initialVar_ will be used as the first - split. - - """ - totEntropy = CalcTotalEntropy(examples, nPossibleVals) - varTable = GenVarTable(examples, nPossibleVals, attrs) - - tree = DecTree.DecTreeNode(None, 'node') - # tree.SetExamples(examples) - tree._nResultCodes = nPossibleVals[-1] - - # you've got to love any language which will let you - # do this much work in a single line :-) - if initialVar is None: - best = attrs[numpy.argmax([entropy.InfoGain(x) for x in varTable])] - else: - best = initialVar - - tree.SetName('Var: %d' % best) - tree.SetData(totEntropy) - tree.SetLabel(best) - tree.SetTerminal(0) - nextAttrs = list(attrs) - if not kwargs.get('recycleVars', 0): - nextAttrs.remove(best) - - for val in range(nPossibleVals[best]): - nextExamples = [] - for example in examples: - if example[best] == val: - nextExamples.append(example) - - tree.AddChildNode(ID3(nextExamples, best, nextAttrs, nPossibleVals, depth, maxDepth, **kwargs)) - return tree diff --git a/rdkit/ML/DecTree/PruneTree.py b/rdkit/ML/DecTree/PruneTree.py deleted file mode 100755 index bb21d1f4b89..00000000000 --- a/rdkit/ML/DecTree/PruneTree.py +++ /dev/null @@ -1,306 +0,0 @@ -# -# Copyright (C) 2000-2008 greg Landrum and Rational Discovery LLC -# -""" Contains functionality for doing tree pruning - -""" - -import copy - -import numpy - -from rdkit.ML.DecTree import CrossValidate, DecTree - -_verbose = 0 - - -def MaxCount(examples): - """ given a set of examples, returns the most common result code - - **Arguments** - - examples: a list of examples to be counted - - **Returns** - - the most common result code - - """ - resList = [x[-1] for x in examples] - maxVal = max(resList) - counts = [None] * (maxVal + 1) - for i in range(maxVal + 1): - counts[i] = sum([x == i for x in resList]) - - return numpy.argmax(counts) - - -def _GetLocalError(node): - nWrong = 0 - for example in node.GetExamples(): - pred = node.ClassifyExample(example, appendExamples=0) - if pred != example[-1]: - nWrong += 1 - # if _verbose: print('------------------>MISS:',example,pred) - return nWrong - - -def _Pruner(node, level=0): - """Recursively finds and removes the nodes whose removals improve classification - - **Arguments** - - - node: the tree to be pruned. The pruning data should already be contained - within node (i.e. node.GetExamples() should return the pruning data) - - - level: (optional) the level of recursion, used only in _verbose printing - - - **Returns** - - the pruned version of node - - - **Notes** - - - This uses a greedy algorithm which basically does a DFS traversal of the tree, - removing nodes whenever possible. - - - If removing a node does not affect the accuracy, it *will be* removed. We - favor smaller trees. - - """ - if _verbose: - print(' ' * level, '<%d> ' % level, '>>> Pruner') - children = node.GetChildren()[:] - - bestTree = copy.deepcopy(node) - bestErr = 1e6 - # - # Loop over the children of this node, removing them when doing so - # either improves the local error or leaves it unchanged (we're - # introducing a bias for simpler trees). - # - for i in range(len(children)): - child = children[i] - examples = child.GetExamples() - if _verbose: - print(' ' * level, '<%d> ' % level, ' Child:', i, child.GetLabel()) - bestTree.Print() - print() - if len(examples): - if _verbose: - print(' ' * level, '<%d> ' % level, ' Examples', len(examples)) - if child.GetTerminal(): - if _verbose: - print(' ' * level, '<%d> ' % level, ' Terminal') - continue - - if _verbose: - print(' ' * level, '<%d> ' % level, ' Nonterminal') - - workTree = copy.deepcopy(bestTree) - # - # First recurse on the child (try removing things below it) - # - newNode = _Pruner(child, level=level + 1) - workTree.ReplaceChildIndex(i, newNode) - tempErr = _GetLocalError(workTree) - if tempErr <= bestErr: - bestErr = tempErr - bestTree = copy.deepcopy(workTree) - if _verbose: - print(' ' * level, '<%d> ' % level, '>->->->->->') - print(' ' * level, '<%d> ' % level, 'replacing:', i, child.GetLabel()) - child.Print() - print(' ' * level, '<%d> ' % level, 'with:') - newNode.Print() - print(' ' * level, '<%d> ' % level, '<-<-<-<-<-<') - else: - workTree.ReplaceChildIndex(i, child) - # - # Now try replacing the child entirely - # - bestGuess = MaxCount(child.GetExamples()) - newNode = DecTree.DecTreeNode(workTree, 'L:%d' % (bestGuess), label=bestGuess, isTerminal=1) - newNode.SetExamples(child.GetExamples()) - workTree.ReplaceChildIndex(i, newNode) - if _verbose: - print(' ' * level, '<%d> ' % level, 'ATTEMPT:') - workTree.Print() - newErr = _GetLocalError(workTree) - if _verbose: - print(' ' * level, '<%d> ' % level, '---> ', newErr, bestErr) - if newErr <= bestErr: - bestErr = newErr - bestTree = copy.deepcopy(workTree) - if _verbose: - print(' ' * level, '<%d> ' % level, 'PRUNING:') - workTree.Print() - else: - if _verbose: - print(' ' * level, '<%d> ' % level, 'FAIL') - # whoops... put the child back in: - workTree.ReplaceChildIndex(i, child) - else: - if _verbose: - print(' ' * level, '<%d> ' % level, ' No Examples', len(examples)) - # - # FIX: we need to figure out what to do here (nodes that contain - # no examples in the testing set). I can concoct arguments for - # leaving them in and for removing them. At the moment they are - # left intact. - # - pass - - if _verbose: - print(' ' * level, '<%d> ' % level, '<<< out') - return bestTree - - -def PruneTree(tree, trainExamples, testExamples, minimizeTestErrorOnly=1): - """ implements a reduced-error pruning of decision trees - - This algorithm is described on page 69 of Mitchell's book. - - Pruning can be done using just the set of testExamples (the validation set) - or both the testExamples and the trainExamples by setting minimizeTestErrorOnly - to 0. - - **Arguments** - - - tree: the initial tree to be pruned - - - trainExamples: the examples used to train the tree - - - testExamples: the examples held out for testing the tree - - - minimizeTestErrorOnly: if this toggle is zero, all examples (i.e. - _trainExamples_ + _testExamples_ will be used to evaluate the error. - - **Returns** - - a 2-tuple containing: - - 1) the best tree - - 2) the best error (the one which corresponds to that tree) - - """ - if minimizeTestErrorOnly: - testSet = testExamples - else: - testSet = trainExamples + testExamples - - # remove any stored examples the tree may have - tree.ClearExamples() - - # - # screen the test data through the tree so that we end up with the - # appropriate points stored at each node of the tree. Results are ignored - # - totErr, badEx = CrossValidate.CrossValidate(tree, testSet, appendExamples=1) - - # - # Prune - # - newTree = _Pruner(tree) - - # - # And recalculate the errors - # - totErr, badEx = CrossValidate.CrossValidate(newTree, testSet) - newTree.SetBadExamples(badEx) - - return newTree, totErr - - -# ------- -# testing code -# ------- -def _testRandom(): - from rdkit.ML.DecTree import randomtest - - # examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nVars=20, randScale=0.25, - # nExamples=200) - examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nVars=10, randScale=0.5, - nExamples=200) - tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals) - tree.Print() - tree.Pickle('orig.pkl') - print('original error is:', frac) - - print('----Pruning') - newTree, frac2 = PruneTree(tree, tree.GetTrainingExamples(), tree.GetTestExamples()) - newTree.Print() - print('pruned error is:', frac2) - newTree.Pickle('prune.pkl') - - -def _testSpecific(): - from rdkit.ML.DecTree import ID3 - oPts = [ - [0, 0, 1, 0], - [0, 1, 1, 1], - [1, 0, 1, 1], - [1, 1, 0, 0], - [1, 1, 1, 1], - ] - tPts = oPts + [[0, 1, 1, 0], [0, 1, 1, 0]] - - tree = ID3.ID3Boot(oPts, attrs=range(3), nPossibleVals=[2] * 4) - tree.Print() - err, _ = CrossValidate.CrossValidate(tree, oPts) - print('original error:', err) - - err, _ = CrossValidate.CrossValidate(tree, tPts) - print('original holdout error:', err) - newTree, frac2 = PruneTree(tree, oPts, tPts) - newTree.Print() - print('best error of pruned tree:', frac2) - err, badEx = CrossValidate.CrossValidate(newTree, tPts) - print('pruned holdout error is:', err) - print(badEx) - - -# print(len(tree), len(newTree)) - - -def _testChain(): - from rdkit.ML.DecTree import ID3 - oPts = [ - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [0, 0, 1, 1, 0], - [0, 0, 1, 1, 0], - [0, 0, 1, 1, 1], - [0, 1, 0, 1, 0], - [0, 1, 0, 1, 0], - [0, 1, 0, 0, 1], - ] - tPts = oPts - - tree = ID3.ID3Boot(oPts, attrs=range(len(oPts[0]) - 1), nPossibleVals=[2] * len(oPts[0])) - tree.Print() - err, _ = CrossValidate.CrossValidate(tree, oPts) - print('original error:', err) - - err, _ = CrossValidate.CrossValidate(tree, tPts) - print('original holdout error:', err) - newTree, frac2 = PruneTree(tree, oPts, tPts) - newTree.Print() - print('best error of pruned tree:', frac2) - err, badEx = CrossValidate.CrossValidate(newTree, tPts) - print('pruned holdout error is:', err) - print(badEx) - - -if __name__ == '__main__': # pragma: nocover - _verbose = 1 - # _testRandom() - _testChain() diff --git a/rdkit/ML/DecTree/QuantTree.py b/rdkit/ML/DecTree/QuantTree.py deleted file mode 100644 index 20a1010420e..00000000000 --- a/rdkit/ML/DecTree/QuantTree.py +++ /dev/null @@ -1,98 +0,0 @@ -# $Id$ -# -# Copyright (C) 2001, 2003 greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -""" Defines the class _QuantTreeNode_, used to represent decision trees with automatic - quantization bounds - - _QuantTreeNode_ is derived from _DecTree.DecTreeNode_ - -""" -from rdkit.ML.DecTree import DecTree, Tree - - -class QuantTreeNode(DecTree.DecTreeNode): - """ - - """ - - def __init__(self, *args, **kwargs): - DecTree.DecTreeNode.__init__(self, *args, **kwargs) - self.qBounds = [] - self.nBounds = 0 - - def ClassifyExample(self, example, appendExamples=0): - """ Recursively classify an example by running it through the tree - - **Arguments** - - - example: the example to be classified - - - appendExamples: if this is nonzero then this node (and all children) - will store the example - - **Returns** - - the classification of _example_ - - **NOTE:** - In the interest of speed, I don't use accessor functions - here. So if you subclass DecTreeNode for your own trees, you'll - have to either include ClassifyExample or avoid changing the names - of the instance variables this needs. - - """ - if appendExamples: - self.examples.append(example) - if self.terminalNode: - return self.label - else: - val = example[self.label] - if not hasattr(self, 'nBounds'): - self.nBounds = len(self.qBounds) - if self.nBounds: - for i, bound in enumerate(self.qBounds): - if val < bound: - val = i - break - else: - val = i + 1 - else: - val = int(val) - return self.children[val].ClassifyExample(example, appendExamples=appendExamples) - - def SetQuantBounds(self, qBounds): - self.qBounds = qBounds[:] - self.nBounds = len(self.qBounds) - - def GetQuantBounds(self): - return self.qBounds - - def __cmp__(self, other): - return (self < other) * -1 or (other < self) * 1 - - def __lt__(self, other): - if str(type(self)) < str(type(other)): - return True - if self.qBounds < other.qBounds: - return True - if Tree.TreeNode.__lt__(self, other): - return True - return False - - def __eq__(self, other): - return not self < other and not other < self - - def __str__(self): - """ returns a string representation of the tree - - **Note** - - this works recursively - - """ - here = '%s%s %s\n' % (' ' * self.level, self.name, str(self.qBounds)) - for child in self.children: - here = here + str(child) - return here diff --git a/rdkit/ML/DecTree/SigTree.py b/rdkit/ML/DecTree/SigTree.py deleted file mode 100644 index c9a4173422e..00000000000 --- a/rdkit/ML/DecTree/SigTree.py +++ /dev/null @@ -1,64 +0,0 @@ -# -# Copyright (C) 2003-2005 Rational Discovery LLC -# All Rights Reserved -# -""" Defines the class SigTreeNode, used to represent trees that - use signatures (bit vectors) to represent data. As inputs (examples), - SigTreeNode's expect 3-sequences: (label,sig,act) - - _SigTreeNode_ is derived from _DecTree.DecTreeNode_ - -""" -import copy - -from rdkit.DataStructs.VectCollection import VectCollection -from rdkit.ML.DecTree import DecTree - - -class SigTreeNode(DecTree.DecTreeNode): - """ - - """ - - def NameModel(self, *args, **kwargs): - pass - - def ClassifyExample(self, example, appendExamples=0): - """ Recursively classify an example by running it through the tree - - **Arguments** - - - example: the example to be classified, a sequence at least - 2 long: - ( id, sig ) - where sig is a BitVector (or something supporting __getitem__) - additional fields will be ignored. - - - appendExamples: if this is nonzero then this node (and all children) - will store the example - - **Returns** - - the classification of _example_ - - """ - if appendExamples: - self.examples.append(example) - if self.terminalNode: - return self.label - else: - sig = example[1] - val = sig[self.label] - # print 'val:',val - if val and isinstance(sig, VectCollection): - # we need to copy and modify the example: - sig = copy.copy(sig) - sig.DetachVectsNotMatchingBit(self.label) - ex = [example[0], sig] - if len(example) > 2: - ex.extend(example[2:]) - example = ex - return self.children[val].ClassifyExample(example, appendExamples=appendExamples) - - def __init__(self, *args, **kwargs): - DecTree.DecTreeNode.__init__(self, *args, **kwargs) diff --git a/rdkit/ML/DecTree/Tree.py b/rdkit/ML/DecTree/Tree.py deleted file mode 100755 index 9b33dee52a1..00000000000 --- a/rdkit/ML/DecTree/Tree.py +++ /dev/null @@ -1,339 +0,0 @@ -# -# Copyright (C) 2000-2008 greg Landrum and Rational Discovery LLC -# -""" Implements a class used to represent N-ary trees - -""" - -import pickle - - -# FIX: the TreeNode class has not been updated to new-style classes -# (RD Issue380) because that would break all of our legacy pickled -# data. Until a solution is found for this breakage, an update is -# impossible. -class TreeNode: - """ This is your bog standard Tree class. - - the root of the tree is just a TreeNode like all other members. - """ - - def __init__(self, parent, name, label=None, data=None, level=0, isTerminal=0): - """ constructor - - **Arguments** - - - parent: the parent of this node in the tree - - - name: the name of the node - - - label: the node's label (should be an integer) - - - data: an optional data field - - - level: an integer indicating the level of this node in the hierarchy - (used for printing) - - - isTerminal: flags a node as being terminal. This is useful for those - times when it's useful to know such things. - - """ - self.children = [] - self.parent = parent - self.name = name - self.data = data - self.terminalNode = isTerminal - self.label = label - self.level = level - self.examples = [] - - def NameTree(self, varNames): - """ Set the names of each node in the tree from a list of variable names. - - **Arguments** - - - varNames: a list of names to be assigned - - **Notes** - - 1) this works its magic by recursively traversing all children - - 2) The assumption is made here that the varNames list can be indexed - by the labels of tree nodes - - """ - if self.GetTerminal(): - return - else: - for child in self.GetChildren(): - child.NameTree(varNames) - self.SetName(varNames[self.GetLabel()]) - - NameModel = NameTree - - def AddChildNode(self, node): - """ Adds a TreeNode to the local list of children - - **Arguments** - - - node: the node to be added - - **Note** - - the level of the node (used in printing) is set as well - - """ - node.SetLevel(self.level + 1) - self.children.append(node) - - def AddChild(self, name, label=None, data=None, isTerminal=0): - """ Creates a new TreeNode and adds a child to the tree - - **Arguments** - - - name: the name of the new node - - - label: the label of the new node (should be an integer) - - - data: the data to be stored in the new node - - - isTerminal: a toggle to indicate whether or not the new node is - a terminal (leaf) node. - - **Returns* - - the _TreeNode_ which is constructed - - """ - child = TreeNode(self, name, label, data, level=self.level + 1, isTerminal=isTerminal) - self.children.append(child) - return child - - def PruneChild(self, child): - """ Removes the child node - - **Arguments** - - - child: a TreeNode - - """ - self.children.remove(child) - - def ReplaceChildIndex(self, index, newChild): - """ Replaces a given child with a new one - - **Arguments** - - - index: an integer - - - child: a TreeNode - - """ - self.children[index] = newChild - - def GetChildren(self): - """ Returns a python list of the children of this node - - """ - return self.children - - def Destroy(self): - """ Destroys this node and all of its children - - """ - for child in self.children: - child.Destroy() - self.children = [] - # clean up circular references - self.parent = None - - def GetName(self): - """ Returns the name of this node - - """ - return self.name - - def SetName(self, name): - """ Sets the name of this node - - """ - self.name = name - - def GetData(self): - """ Returns the data stored at this node - - """ - return self.data - - def SetData(self, data): - """ Sets the data stored at this node - - """ - self.data = data - - def GetTerminal(self): - """ Returns whether or not this node is terminal - - """ - return self.terminalNode - - def SetTerminal(self, isTerminal): - """ Sets whether or not this node is terminal - - """ - self.terminalNode = isTerminal - - def GetLabel(self): - """ Returns the label of this node - - """ - return self.label - - def SetLabel(self, label): - """ Sets the label of this node (should be an integer) - - """ - self.label = label - - def GetLevel(self): - """ Returns the level of this node - - """ - return self.level - - def SetLevel(self, level): - """ Sets the level of this node - - """ - self.level = level - - def GetParent(self): - """ Returns the parent of this node - - """ - return self.parent - - def SetParent(self, parent): - """ Sets the parent of this node - - """ - self.parent = parent - - def Print(self, level=0, showData=0): - """ Pretty prints the tree - - **Arguments** - - - level: sets the number of spaces to be added at the beginning of the output - - - showData: if this is nonzero, the node's _data_ value will be printed as well - - **Note** - - this works recursively - - """ - if showData: - print('%s%s: %s' % (' ' * level, self.name, str(self.data))) - else: - print('%s%s' % (' ' * level, self.name)) - - for child in self.children: - child.Print(level + 1, showData=showData) - - def Pickle(self, fileName='foo.pkl'): - """ Pickles the tree and writes it to disk - - """ - with open(fileName, 'wb+') as pFile: - pickle.dump(self, pFile) - - def __str__(self): - """ returns a string representation of the tree - - **Note** - - this works recursively - - """ - here = '%s%s\n' % (' ' * self.level, self.name) - for child in self.children: - here = here + str(child) - return here - - def __cmp__(self, other): - """ allows tree1 == tree2 - - **Note** - - This works recursively - """ - return (self < other) * -1 or (other < self) * 1 - - def __lt__(self, other): - """ allows tree1 < tree2 - - **Note** - - This works recursively - """ - try: - nChildren = len(self.children) - oChildren = len(other.children) - if str(type(self)) < str(type(other)): - return True - if self.name < other.name: - return True - if self.label is not None: - if other.label is not None: - if self.label < other.label: - return True - else: - return False - elif other.label is not None: - return True - if nChildren < oChildren: - return True - if nChildren > oChildren: - return False - for i in range(nChildren): - if self.children[i] < other.children[i]: - return True - except AttributeError: - return True - return False - - def __eq__(self, other): - return not self < other and not other < self - - -def _exampleCode(): - tree = TreeNode(None, 'root') - for i in range(3): - tree.AddChild('child %d' % i) - print(tree) - tree.GetChildren()[1].AddChild('grandchild') - tree.GetChildren()[1].AddChild('grandchild2') - tree.GetChildren()[1].AddChild('grandchild3') - print(tree) - tree.Pickle('save.pkl') - print('prune') - tree.PruneChild(tree.GetChildren()[1]) - print('done') - print(tree) - - import copy - tree2 = copy.deepcopy(tree) - print('tree==tree2', tree == tree2) - - foo = [tree] - print('tree in [tree]:', tree in foo, foo.index(tree)) - print('tree2 in [tree]:', tree2 in foo, foo.index(tree2)) - - tree2.GetChildren()[1].AddChild('grandchild4') - print('tree==tree2', tree == tree2) - tree.Destroy() - - -if __name__ == '__main__': # pragma: nocover - _exampleCode() diff --git a/rdkit/ML/DecTree/TreeUtils.py b/rdkit/ML/DecTree/TreeUtils.py deleted file mode 100644 index 8cd9e3b6647..00000000000 --- a/rdkit/ML/DecTree/TreeUtils.py +++ /dev/null @@ -1,28 +0,0 @@ -# -# Copyright (C) 2001-2004 greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -""" Utilities for working with trees - -""" - - -def CollectLabelLevels(tree, levels, level=0, maxDepth=1e8): - if level < maxDepth: - if not tree.GetTerminal(): - l = tree.GetLabel() - currLevel = levels.get(l, 1e8) - if level < currLevel: - levels[l] = level - for child in tree.GetChildren(): - CollectLabelLevels(child, levels, level + 1, maxDepth) - return levels - - -def CollectDescriptorNames(tree, names, level=0, maxDepth=1e8): - if level < maxDepth: - if not tree.GetTerminal(): - names[tree.GetLabel()] = tree.GetName() - for child in tree.GetChildren(): - CollectDescriptorNames(child, names, level + 1, maxDepth) - return names diff --git a/rdkit/ML/DecTree/TreeVis.py b/rdkit/ML/DecTree/TreeVis.py deleted file mode 100755 index 90f808d096f..00000000000 --- a/rdkit/ML/DecTree/TreeVis.py +++ /dev/null @@ -1,229 +0,0 @@ -# $Id$ -# -# Copyright (C) 2002,2003 Greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -""" functionality for drawing trees on sping canvases - -""" -import math - -from rdkit.sping import pid as piddle - - -class VisOpts(object): - circRad = 10 - minCircRad = 4 - maxCircRad = 16 - circColor = piddle.Color(0.6, 0.6, 0.9) - terminalEmptyColor = piddle.Color(.8, .8, .2) - terminalOnColor = piddle.Color(0.8, 0.8, 0.8) - terminalOffColor = piddle.Color(0.2, 0.2, 0.2) - outlineColor = piddle.transparent - lineColor = piddle.Color(0, 0, 0) - lineWidth = 2 - horizOffset = 10 - vertOffset = 50 - labelFont = piddle.Font(face='helvetica', size=10) - highlightColor = piddle.Color(1., 1., .4) - highlightWidth = 2 - - -visOpts = VisOpts() - - -def CalcTreeNodeSizes(node): - """Recursively calculate the total number of nodes under us. - - results are set in node.totNChildren for this node and - everything underneath it. - """ - children = node.GetChildren() - if len(children) > 0: - nHere = 0 - nBelow = 0 - for child in children: - CalcTreeNodeSizes(child) - nHere = nHere + child.totNChildren - if child.nLevelsBelow > nBelow: - nBelow = child.nLevelsBelow - else: - nBelow = 0 - nHere = 1 - - node.nExamples = len(node.GetExamples()) - node.totNChildren = nHere - node.nLevelsBelow = nBelow + 1 - - -def _ExampleCounter(node, min, max): - if node.GetTerminal(): - cnt = node.nExamples - if cnt < min: - min = cnt - if cnt > max: - max = cnt - else: - for child in node.GetChildren(): - provMin, provMax = _ExampleCounter(child, min, max) - if provMin < min: - min = provMin - if provMax > max: - max = provMax - return min, max - - -def _ApplyNodeScales(node, min, max): - if node.GetTerminal(): - if max != min: - loc = float(node.nExamples - min) / (max - min) - else: - loc = .5 - node._scaleLoc = loc - else: - for child in node.GetChildren(): - _ApplyNodeScales(child, min, max) - - -def SetNodeScales(node): - min, max = 1e8, -1e8 - min, max = _ExampleCounter(node, min, max) - node._scales = min, max - _ApplyNodeScales(node, min, max) - - -def DrawTreeNode(node, loc, canvas, nRes=2, scaleLeaves=False, showPurity=False): - """Recursively displays the given tree node and all its children on the canvas - """ - try: - nChildren = node.totNChildren - except AttributeError: - nChildren = None - if nChildren is None: - CalcTreeNodeSizes(node) - - if not scaleLeaves or not node.GetTerminal(): - rad = visOpts.circRad - else: - scaleLoc = getattr(node, "_scaleLoc", 0.5) - - rad = visOpts.minCircRad + node._scaleLoc * (visOpts.maxCircRad - visOpts.minCircRad) - - x1 = loc[0] - rad - y1 = loc[1] - rad - x2 = loc[0] + rad - y2 = loc[1] + rad - - if showPurity and node.GetTerminal(): - examples = node.GetExamples() - nEx = len(examples) - if nEx: - tgtVal = int(node.GetLabel()) - purity = 0.0 - for ex in examples: - if int(ex[-1]) == tgtVal: - purity += 1. / len(examples) - else: - purity = 1.0 - - deg = purity * math.pi - xFact = rad * math.sin(deg) - yFact = rad * math.cos(deg) - pureX = loc[0] + xFact - pureY = loc[1] + yFact - - children = node.GetChildren() - # just move down one level - childY = loc[1] + visOpts.vertOffset - # this is the left-hand side of the leftmost span - childX = loc[0] - ((visOpts.horizOffset + visOpts.circRad) * node.totNChildren) / 2 - for i in range(len(children)): - # center on this child's space - child = children[i] - halfWidth = ((visOpts.horizOffset + visOpts.circRad) * child.totNChildren) / 2 - - childX = childX + halfWidth - childLoc = [childX, childY] - canvas.drawLine(loc[0], loc[1], childLoc[0], childLoc[1], visOpts.lineColor, visOpts.lineWidth) - DrawTreeNode(child, childLoc, canvas, nRes=nRes, scaleLeaves=scaleLeaves, showPurity=showPurity) - - # and move over to the leftmost point of the next child - childX = childX + halfWidth - - if node.GetTerminal(): - lab = node.GetLabel() - cFac = float(lab) / float(nRes - 1) - if hasattr(node, 'GetExamples') and node.GetExamples(): - theColor = (1. - cFac) * visOpts.terminalOffColor + cFac * visOpts.terminalOnColor - outlColor = visOpts.outlineColor - else: - theColor = (1. - cFac) * visOpts.terminalOffColor + cFac * visOpts.terminalOnColor - outlColor = visOpts.terminalEmptyColor - canvas.drawEllipse(x1, y1, x2, y2, outlColor, visOpts.lineWidth, theColor) - if showPurity: - canvas.drawLine(loc[0], loc[1], pureX, pureY, piddle.Color(1, 1, 1), 2) - else: - theColor = visOpts.circColor - canvas.drawEllipse(x1, y1, x2, y2, visOpts.outlineColor, visOpts.lineWidth, theColor) - - # this does not need to be done every time - canvas.defaultFont = visOpts.labelFont - - labelStr = str(node.GetLabel()) - strLoc = (loc[0] - canvas.stringWidth(labelStr) / 2, loc[1] + canvas.fontHeight() / 4) - - canvas.drawString(labelStr, strLoc[0], strLoc[1]) - node._bBox = (x1, y1, x2, y2) - - -def CalcTreeWidth(tree): - try: - tree.totNChildren - except AttributeError: - CalcTreeNodeSizes(tree) - totWidth = tree.totNChildren * (visOpts.circRad + visOpts.horizOffset) - return totWidth - - -def DrawTree(tree, canvas, nRes=2, scaleLeaves=False, allowShrink=True, showPurity=False): - dims = canvas.size - loc = (dims[0] / 2, visOpts.vertOffset) - if scaleLeaves: - # try: - # l = tree._scales - # except AttributeError: - # l = None - # if l is None: - SetNodeScales(tree) - if allowShrink: - treeWid = CalcTreeWidth(tree) - while treeWid > dims[0]: - visOpts.circRad /= 2 - visOpts.horizOffset /= 2 - treeWid = CalcTreeWidth(tree) - DrawTreeNode(tree, loc, canvas, nRes, scaleLeaves=scaleLeaves, showPurity=showPurity) - - -def ResetTree(tree): - tree._scales = None - tree.totNChildren = None - for child in tree.GetChildren(): - ResetTree(child) - - -def _simpleTest(canv): - from .Tree import TreeNode as Node - root = Node(None, 'r', label='r') - c1 = root.AddChild('l1_1', label='l1_1') - c2 = root.AddChild('l1_2', isTerminal=1, label=1) - c3 = c1.AddChild('l2_1', isTerminal=1, label=0) - c4 = c1.AddChild('l2_2', isTerminal=1, label=1) - - DrawTreeNode(root, (150, visOpts.vertOffset), canv) - - -if __name__ == '__main__': - from rdkit.sping.PIL.pidPIL import PILCanvas - canv = PILCanvas(size=(300, 300), name='test.png') - _simpleTest(canv) - canv.save() diff --git a/rdkit/ML/DecTree/UnitTestID3.py b/rdkit/ML/DecTree/UnitTestID3.py deleted file mode 100644 index a0519c496b4..00000000000 --- a/rdkit/ML/DecTree/UnitTestID3.py +++ /dev/null @@ -1,148 +0,0 @@ -# -# Copyright (C) 2000 greg Landrum -# -""" unit tests for the ID3 implementation """ - -import io -import pickle -import unittest - -from rdkit import RDConfig -from rdkit.ML.Data import MLData -from rdkit.ML.DecTree import ID3 - - -class ID3TestCase(unittest.TestCase): - - def setUp(self): - self.basicTreeName = RDConfig.RDCodeDir + '/ML/DecTree/test_data/BasicTree.pkl' - self.multiTreeName = RDConfig.RDCodeDir + '/ML/DecTree/test_data/MultiTree.pkl' - - def _setupBasicTree(self): - examples = [[0, 0, 0, 0, 0], [0, 0, 0, 1, 0], [1, 0, 0, 0, 1], [2, 1, 0, 0, 1], [2, 2, 1, 0, 1], - [2, 2, 1, 1, 0], [1, 2, 1, 1, 1], [0, 1, 0, 0, 0], [0, 2, 1, 0, 1], [2, 1, 1, 0, 1], - [0, 1, 1, 1, 1], [1, 1, 0, 1, 1], [1, 0, 1, 0, 1], [2, 1, 0, 1, 0]] - - data = MLData.MLQuantDataSet(examples) - attrs = list(range(0, data.GetNVars())) - t1 = ID3.ID3Boot(data.GetAllData(), attrs, data.GetNPossibleVals()) - self.t1 = t1 - self.examples = examples - - def testBasicTree(self): - # " testing basic tree growth " - self._setupBasicTree() - with open(self.basicTreeName, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - with io.BytesIO(buf) as inFile: - t2 = pickle.load(inFile) - assert self.t1 == t2, 'Incorrect tree generated.' - - def _setupMultiTree(self): - examples = [[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 2], [0, 1, 1, 2], [1, 0, 0, 2], [1, 0, 1, 2], - [1, 1, 0, 2], [1, 1, 1, 0]] - - data = MLData.MLQuantDataSet(examples) - attrs = list(range(0, data.GetNVars())) - t1 = ID3.ID3Boot(data.GetAllData(), attrs, data.GetNPossibleVals()) - self.t1 = t1 - self.examples = examples - - def testMultiTree(self): - # " testing multivalued tree growth " - self._setupMultiTree() - with open(self.multiTreeName, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - with io.BytesIO(buf) as inFile: - t2 = pickle.load(inFile) - assert self.t1 == t2, 'Incorrect tree generated.' - - def testClassify(self): - # " testing basic tree classification " - self._setupBasicTree() - self.assertEqual(self.t1.ClassifyExample(self.examples[0]), self.examples[0][-1], - 'BasicExample 0 misclassified') - self.assertEqual(self.t1.ClassifyExample(self.examples[1]), self.examples[1][-1], - 'BasicExample 1 misclassified') - self.assertEqual(self.t1.ClassifyExample(self.examples[6]), self.examples[6][-1], - 'BasicExample 6 misclassified') - self._setupMultiTree() - self.assertEqual(self.t1.ClassifyExample(self.examples[0]), self.examples[0][-1], - 'MultiExample 0 misclassified') - self.assertEqual(self.t1.ClassifyExample(self.examples[1]), self.examples[1][-1], - 'MultiExample 1 misclassified') - self.assertEqual(self.t1.ClassifyExample(self.examples[6]), self.examples[6][-1], - 'MultiExample 6 misclassified') - - # ------------- force python in the ID3 code - def _setupPyBasicTree(self): - from rdkit.ML.InfoTheory import entropy - ID3.entropy.InfoEntropy = entropy.PyInfoEntropy - ID3.entropy.InfoGain = entropy.PyInfoGain - - examples = [[0, 0, 0, 0, 0], [0, 0, 0, 1, 0], [1, 0, 0, 0, 1], [2, 1, 0, 0, 1], [2, 2, 1, 0, 1], - [2, 2, 1, 1, 0], [1, 2, 1, 1, 1], [0, 1, 0, 0, 0], [0, 2, 1, 0, 1], [2, 1, 1, 0, 1], - [0, 1, 1, 1, 1], [1, 1, 0, 1, 1], [1, 0, 1, 0, 1], [2, 1, 0, 1, 0]] - - data = MLData.MLQuantDataSet(examples) - attrs = list(range(0, data.GetNVars())) - t1 = ID3.ID3Boot(data.GetAllData(), attrs, data.GetNPossibleVals()) - self.t1 = t1 - self.examples = examples - - def testPyBasicTree(self): - # " testing basic tree growth (python entropy code) " - self._setupPyBasicTree() - with open(self.basicTreeName, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - with io.BytesIO(buf) as inFile: - t2 = pickle.load(inFile) - assert self.t1 == t2, 'Incorrect tree generated.' - - def _setupPyMultiTree(self): - from rdkit.ML.InfoTheory import entropy - ID3.entropy.InfoEntropy = entropy.PyInfoEntropy - ID3.entropy.InfoGain = entropy.PyInfoGain - - examples = [[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 2], [0, 1, 1, 2], [1, 0, 0, 2], [1, 0, 1, 2], - [1, 1, 0, 2], [1, 1, 1, 0]] - - data = MLData.MLQuantDataSet(examples) - attrs = list(range(0, data.GetNVars())) - t1 = ID3.ID3Boot(data.GetAllData(), attrs, data.GetNPossibleVals()) - self.t1 = t1 - self.examples = examples - - def testPyMultiTree(self): - # " testing multivalued tree growth (python entropy code) " - self._setupPyMultiTree() - with open(self.multiTreeName, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - with io.BytesIO(buf) as inFile: - t2 = pickle.load(inFile) - assert self.t1 == t2, 'Incorrect tree generated.' - - def testPyClassify(self): - # " testing tree classification (python entropy code) " - self._setupPyBasicTree() - self.assertEqual(self.t1.ClassifyExample(self.examples[0]), self.examples[0][-1], - 'BasicExample 0 misclassified') - self.assertEqual(self.t1.ClassifyExample(self.examples[1]), self.examples[1][-1], - 'BasicExample 1 misclassified') - self.assertEqual(self.t1.ClassifyExample(self.examples[6]), self.examples[6][-1], - 'BasicExample 6 misclassified') - self._setupMultiTree() - self.assertEqual(self.t1.ClassifyExample(self.examples[0]), self.examples[0][-1], - 'MultiExample 0 misclassified') - self.assertEqual(self.t1.ClassifyExample(self.examples[1]), self.examples[1][-1], - 'MultiExample 1 misclassified') - self.assertEqual(self.t1.ClassifyExample(self.examples[6]), self.examples[6][-1], - 'MultiExample 6 misclassified') - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/DecTree/UnitTestPrune.py b/rdkit/ML/DecTree/UnitTestPrune.py deleted file mode 100755 index 5a9c0c8e852..00000000000 --- a/rdkit/ML/DecTree/UnitTestPrune.py +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright (C) 2003 greg Landrum and Rational Discovery LLC -# -""" """ -import os -import unittest -from io import StringIO - -from rdkit.ML.DecTree import ID3, CrossValidate, PruneTree -from rdkit.TestRunner import redirect_stdout - - -def feq(a, b, tol=1e-4): - return abs(a - b) <= tol - - -class TreeTestCase(unittest.TestCase): - - def setUp(self): - pass - - def test1(self): - # " testing pruning with known results " - oPts = [ - [0, 0, 1, 0], - [0, 1, 1, 1], - [1, 0, 1, 1], - [1, 1, 0, 0], - [1, 1, 1, 1], - ] - tPts = oPts + [[0, 1, 1, 0], [0, 1, 1, 0]] - tree = ID3.ID3Boot(oPts, attrs=range(3), nPossibleVals=[2] * 4) - err, badEx = CrossValidate.CrossValidate(tree, oPts) - assert err == 0.0, 'bad initial error' - assert len(badEx) == 0, 'bad initial error' - - # prune with original data, shouldn't do anything - f = StringIO() - with redirect_stdout(f): - PruneTree._verbose = True - newTree, err = PruneTree.PruneTree(tree, [], oPts) - PruneTree._verbose = False - self.assertIn('Pruner', f.getvalue()) - assert newTree == tree, 'improper pruning' - - # prune with train data - newTree, err = PruneTree.PruneTree(tree, [], tPts) - assert newTree != tree, 'bad pruning' - assert feq(err, 0.14286), 'bad error result' - - def test_exampleCode(self): - f = StringIO() - with redirect_stdout(f): - try: - PruneTree._testRandom() - self.assertTrue(os.path.isfile('prune.pkl')) - finally: - if os.path.isfile('orig.pkl'): - os.remove('orig.pkl') - if os.path.isfile('prune.pkl'): - os.remove('prune.pkl') - self.assertIn('pruned error', f.getvalue()) - - f = StringIO() - with redirect_stdout(f): - PruneTree._testSpecific() - self.assertIn('pruned holdout error', f.getvalue()) - - f = StringIO() - with redirect_stdout(f): - PruneTree._testChain() - self.assertIn('pruned holdout error', f.getvalue()) - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/DecTree/UnitTestQuantTree.py b/rdkit/ML/DecTree/UnitTestQuantTree.py deleted file mode 100644 index 03615785d76..00000000000 --- a/rdkit/ML/DecTree/UnitTestQuantTree.py +++ /dev/null @@ -1,216 +0,0 @@ -# -# Copyright (C) 2001,2003 greg Landrum and Rational Discovery LLC -# -""" unit tests for the QuantTree implementation """ - -import io -import pickle -import unittest -from io import StringIO - -from rdkit import RDConfig -from rdkit.ML.DecTree import BuildQuantTree -from rdkit.ML.DecTree.QuantTree import QuantTreeNode -from rdkit.TestRunner import redirect_stdout - - -def cmp(t1, t2): - return (t1 < t2) * -1 or (t1 > t2) * 1 - - -class TestCase(unittest.TestCase): - - def setUp(self): - self.qTree1Name = RDConfig.RDCodeDir + '/ML/DecTree/test_data/QuantTree1.pkl' - self.qTree2Name = RDConfig.RDCodeDir + '/ML/DecTree/test_data/QuantTree2.pkl' - - def _setupTree1(self): - examples1 = [['p1', 0, 1, 0.1, 0], ['p2', 0, 0, 0.1, 1], ['p3', 0, 0, 1.1, 2], - ['p4', 0, 1, 1.1, 2], ['p5', 1, 0, 0.1, 2], ['p6', 1, 0, 1.1, 2], - ['p7', 1, 1, 0.1, 2], ['p8', 1, 1, 1.1, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 2, 2, 0, 3] - boundsPerVar = [0, 0, 0, 1, 0] - - self.t1 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) - self.examples1 = examples1 - - def _setupTree2(self): - examples1 = [['p1', 0.1, 1, 0.1, 0], ['p2', 0.1, 0, 0.1, 1], ['p3', 0.1, 0, 1.1, 2], - ['p4', 0.1, 1, 1.1, 2], ['p5', 1.1, 0, 0.1, 2], ['p6', 1.1, 0, 1.1, 2], - ['p7', 1.1, 1, 0.1, 2], ['p8', 1.1, 1, 1.1, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 0, 2, 0, 3] - boundsPerVar = [0, 1, 0, 1, 0] - - self.t2 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) - self.examples2 = examples1 - - def _setupTree1a(self): - examples1 = [['p1', 0, 1, 0.1, 4.0, 0], ['p2', 0, 0, 0.1, 4.1, 1], ['p3', 0, 0, 1.1, 4.2, 2], - ['p4', 0, 1, 1.1, 4.2, 2], ['p5', 1, 0, 0.1, 4.2, 2], ['p6', 1, 0, 1.1, 4.2, 2], - ['p7', 1, 1, 0.1, 4.2, 2], ['p8', 1, 1, 1.1, 4.0, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 2, 2, 0, 0, 3] - boundsPerVar = [0, 0, 0, 1, -1, 0] - - self.t1 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) - self.examples1 = examples1 - - def test0Cmp(self): - # " testing tree comparisons " - self._setupTree1() - self._setupTree2() - assert self.t1 == self.t1, 'self equals failed' - assert self.t2 == self.t2, 'self equals failed' - assert self.t1 != self.t2, 'not equals failed' - - def test1Tree(self): - # " testing tree1 " - self._setupTree1() - with open(self.qTree1Name, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - with io.BytesIO(buf) as inFile: - t2 = pickle.load(inFile) - assert self.t1 == t2, 'Incorrect tree generated. ' - - self.assertIn('Var: 2 []', str(self.t1)) - self.assertEqual(self.t1.GetQuantBounds(), []) - - def test2Tree(self): - # " testing tree2 " - self._setupTree2() - with open(self.qTree2Name, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - with io.BytesIO(buf) as inFile: - t2 = pickle.load(inFile) - assert self.t2 == t2, 'Incorrect tree generated.' - - def test3Classify(self): - # " testing classification " - self._setupTree1() - self._setupTree2() - for i in range(len(self.examples1)): - self.assertEqual(self.t1.ClassifyExample(self.examples1[i]), self.examples1[i][-1], - msg='examples1[%d] misclassified' % i) - for i in range(len(self.examples2)): - self.assertEqual(self.t2.ClassifyExample(self.examples2[i]), self.examples2[i][-1], - msg='examples2[%d] misclassified' % i) - - def test4UnusedVars(self): - # " testing unused variables " - self._setupTree1a() - with open(self.qTree1Name, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - with io.BytesIO(buf) as inFile: - t2 = pickle.load(inFile) - assert self.t1 == t2, 'Incorrect tree generated.' - for i in range(len(self.examples1)): - self.assertEqual(self.t1.ClassifyExample(self.examples1[i]), self.examples1[i][-1], - 'examples1[%d] misclassified' % i) - - def test5Bug29(self): - # """ a more extensive test of the cmp stuff using hand-built trees """ - import copy - - t1 = QuantTreeNode(None, 't1') - t1.SetQuantBounds([1.]) - c1 = QuantTreeNode(t1, 'c1') - c1.SetQuantBounds([2.]) - t1.AddChildNode(c1) - c2 = QuantTreeNode(t1, 'c2') - c2.SetQuantBounds([2.]) - t1.AddChildNode(c2) - c11 = QuantTreeNode(c1, 'c11') - c11.SetQuantBounds([3.]) - c1.AddChildNode(c11) - c12 = QuantTreeNode(c1, 'c12') - c12.SetQuantBounds([3.]) - c1.AddChildNode(c12) - assert not cmp(t1, copy.deepcopy(t1)), 'self equality failed' - - t2 = QuantTreeNode(None, 't1') - t2.SetQuantBounds([1.]) - c1 = QuantTreeNode(t2, 'c1') - c1.SetQuantBounds([2.]) - t2.AddChildNode(c1) - c2 = QuantTreeNode(t2, 'c2') - c2.SetQuantBounds([2.]) - t2.AddChildNode(c2) - c11 = QuantTreeNode(c1, 'c11') - c11.SetQuantBounds([3.]) - c1.AddChildNode(c11) - c12 = QuantTreeNode(c1, 'c12') - c12.SetQuantBounds([3.00003]) - c1.AddChildNode(c12) - assert cmp(t1, t2), 'inequality failed' - - def test6Bug29_2(self): - # """ a more extensive test of the cmp stuff using pickled trees""" - import os - with open(os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data', 'CmpTree1.pkl'), - 'r') as t1TFile: - buf = t1TFile.read().replace('\r\n', '\n').encode('utf-8') - t1TFile.close() - with io.BytesIO(buf) as t1File: - t1 = pickle.load(t1File) - with open(os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data', 'CmpTree2.pkl'), - 'r') as t2TFile: - buf = t2TFile.read().replace('\r\n', '\n').encode('utf-8') - t2TFile.close() - with io.BytesIO(buf) as t2File: - t2 = pickle.load(t2File) - assert cmp(t1, t2), 'equality failed' - - def test7Recycle(self): - # """ try recycling descriptors """ - examples1 = [ - [3, 0, 0], - [3, 1, 1], - [1, 0, 0], - [0, 0, 1], - [1, 1, 0], - ] - attrs = list(range(2)) - nPossibleVals = [2, 2, 2] - boundsPerVar = [1, 0, 0] - self.t1 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar, - recycleVars=1) - assert self.t1.GetLabel() == 0, self.t1.GetLabel() - assert self.t1.GetChildren()[0].GetLabel() == 1 - assert self.t1.GetChildren()[1].GetLabel() == 1 - assert self.t1.GetChildren()[1].GetChildren()[0].GetLabel() == 0 - assert self.t1.GetChildren()[1].GetChildren()[1].GetLabel() == 0 - - def test8RandomForest(self): - # """ try random forests descriptors """ - import random - random.seed(23) - nAttrs = 100 - nPts = 10 - examples = [] - for _ in range(nPts): - descrs = [int(random.random() > 0.5) for _ in range(nAttrs)] - act = sum(descrs) > nAttrs / 2 - examples.append(descrs + [act]) - attrs = list(range(nAttrs)) - nPossibleVals = [2] * (nAttrs + 1) - boundsPerVar = [0] * nAttrs + [0] - self.t1 = BuildQuantTree.QuantTreeBoot(examples, attrs, nPossibleVals, boundsPerVar, maxDepth=1, - recycleVars=1, randomDescriptors=3) - self.assertEqual(self.t1.GetLabel(), 49) - self.assertEqual(self.t1.GetChildren()[0].GetLabel(), 3) - self.assertEqual(self.t1.GetChildren()[1].GetLabel(), 54) - - def test_exampleCode(self): - f = StringIO() - with redirect_stdout(f): - BuildQuantTree.TestTree() - self.assertIn('Var: 2', f.getvalue()) - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/DecTree/UnitTestSigTree.py b/rdkit/ML/DecTree/UnitTestSigTree.py deleted file mode 100644 index 95dab0f7f08..00000000000 --- a/rdkit/ML/DecTree/UnitTestSigTree.py +++ /dev/null @@ -1,174 +0,0 @@ -# $Id$ -# -# Copyright (C) 2005 greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -import gzip -import os -import unittest -from io import StringIO - -from rdkit import RDConfig -from rdkit.DataStructs import ExplicitBitVect -from rdkit.DataStructs.VectCollection import VectCollection -from rdkit.ML import InfoTheory -from rdkit.ML.DecTree.BuildSigTree import BuildSigTree, _GenerateRandomEnsemble -from rdkit.ML.DecTree.SigTree import SigTreeNode -from rdkit.TestRunner import redirect_stdout - - -class TestCase(unittest.TestCase): - - def setUp(self): - t1 = SigTreeNode(None, 'root', 0) - - t2 = SigTreeNode(t1, 'nodeL1', 1) - t1.AddChildNode(t2) - t3 = SigTreeNode(t2, 'nodeLTerm0', 0, isTerminal=1) - t4 = SigTreeNode(t2, 'nodeLTerm1', 1, isTerminal=1) - t2.AddChildNode(t3) - t2.AddChildNode(t4) - - t2 = SigTreeNode(t1, 'nodeR1', 2) - t1.AddChildNode(t2) - t3 = SigTreeNode(t2, 'nodeRTerm0', 1, isTerminal=1) - t4 = SigTreeNode(t2, 'nodeRTerm1', 0, isTerminal=1) - t2.AddChildNode(t3) - t2.AddChildNode(t4) - self.tree = t1 - - def test1(self): - t1 = self.tree - bv = ExplicitBitVect(5) - - ex = ['nm', bv] - self.assertFalse(t1.ClassifyExample(ex)) - bv.SetBit(1) - self.assertTrue(t1.ClassifyExample(ex)) - - bv.SetBit(0) - self.assertTrue(t1.ClassifyExample(ex)) - - bv.SetBit(2) - self.assertFalse(t1.ClassifyExample(ex)) - - def test2(self): - t1 = self.tree - vc = VectCollection() - - bv = ExplicitBitVect(5) - bv.SetBitsFromList([0]) - vc.AddVect(1, bv) - - bv = ExplicitBitVect(5) - bv.SetBitsFromList([1, 2]) - vc.AddVect(2, bv) - - ex = ['nm', bv, 1] - self.assertTrue(t1.ClassifyExample(ex)) - - bv = ExplicitBitVect(5) - bv.SetBitsFromList([0, 2]) - vc.AddVect(1, bv) - ex = ['nm', bv, 1] - self.assertFalse(t1.ClassifyExample(ex)) - - def test3(self): - examples = [] - - bv = ExplicitBitVect(2) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['a', vc, 1]) - - bv = ExplicitBitVect(2) - bv.SetBit(1) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['c', vc, 0]) - - bv = ExplicitBitVect(2) - bv.SetBit(1) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['c2', vc, 0]) - - bv = ExplicitBitVect(2) - bv.SetBit(0) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['d', vc, 0]) - - bv = ExplicitBitVect(2) - bv.SetBit(0) - vc = VectCollection() - vc.AddVect(1, bv) - bv = ExplicitBitVect(2) - bv.SetBit(1) - vc.AddVect(2, bv) - examples.append(['d2', vc, 0]) - - bv = ExplicitBitVect(2) - bv.SetBit(0) - bv.SetBit(1) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['d', vc, 1]) - - bv = ExplicitBitVect(2) - bv.SetBit(0) - bv.SetBit(1) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['e', vc, 1]) - - f = StringIO() - with redirect_stdout(f): - t = BuildSigTree(examples, 2, metric=InfoTheory.InfoType.ENTROPY, maxDepth=2, verbose=True) - self.assertIn('Build', f.getvalue()) - - self.assertEqual(t.GetName(), 'Bit-0') - self.assertEqual(t.GetLabel(), 0) - c0 = t.GetChildren()[0] - self.assertEqual(c0.GetName(), 'Bit-1') - self.assertEqual(c0.GetLabel(), 1) - c1 = t.GetChildren()[1] - self.assertEqual(c1.GetName(), 'Bit-1') - self.assertEqual(c1.GetLabel(), 1) - - bv = ExplicitBitVect(2) - bv.SetBit(0) - vc = VectCollection() - vc.AddVect(1, bv) - bv = ExplicitBitVect(2) - bv.SetBit(1) - vc.AddVect(2, bv) - r = t.ClassifyExample(['t', vc, 0]) - self.assertEqual(r, 0) - - def test4(self): - import pickle - gz = gzip.open( - os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data', 'cdk2-few.pkl.gz'), 'rb') - examples = pickle.load(gz, encoding='Latin1') - t = BuildSigTree(examples, 2, maxDepth=3) - self.assertEqual(t.GetLabel(), 2181) - self.assertEqual(t.GetChildren()[0].GetLabel(), 2861) - self.assertEqual(t.GetChildren()[1].GetLabel(), 8182) - - def test_GenerateRandomEnsemble(self): - ensemble = _GenerateRandomEnsemble(2, 4) - self.assertEqual(len(ensemble), 2) - self.assertTrue(all(r < 4 for r in ensemble)) - - ensemble = _GenerateRandomEnsemble(4, 4) - self.assertEqual(len(ensemble), 4) - self.assertTrue(all(r < 4 for r in ensemble)) - - ensemble = _GenerateRandomEnsemble(4, 40) - self.assertEqual(len(ensemble), 4) - self.assertTrue(all(r < 40 for r in ensemble)) - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/DecTree/UnitTestTree.py b/rdkit/ML/DecTree/UnitTestTree.py deleted file mode 100755 index 3c88335e70b..00000000000 --- a/rdkit/ML/DecTree/UnitTestTree.py +++ /dev/null @@ -1,137 +0,0 @@ -# -# Copyright (C) 2000 greg Landrum -# -""" unit testing code for trees and decision trees (not learning/xvalidation) """ - -import copy -import os -import pickle -import unittest -from io import StringIO - -from rdkit import RDConfig -from rdkit.ML.DecTree import Tree -from rdkit.TestRunner import redirect_stdout - - -class TreeTestCase(unittest.TestCase): - - def setUp(self): - self.baseTree = Tree.TreeNode(None, 'root') - self.pickleFileName = RDConfig.RDCodeDir + '/ML/DecTree/test_data/treeunit.pkl' - - def test_Tree(self): - tree = Tree.TreeNode(None, 'root', label=0) - self.assertEqual(tree.GetLevel(), 0) - self.assertEqual(tree.GetName(), 'root') - self.assertEqual(tree.GetData(), None) - self.assertEqual(tree.GetTerminal(), False) - self.assertEqual(tree.GetLabel(), 0) - self.assertEqual(tree.GetParent(), None) - self.assertEqual(tree.GetChildren(), []) - - for i in range(3): - child = tree.AddChild('child {0}'.format(i), i + 1, data={'key': 'value'}) - self.assertEqual(child.GetLevel(), 1) - self.assertEqual(child.GetName(), 'child {0}'.format(i)) - self.assertEqual(child.GetData(), {'key': 'value'}) - self.assertEqual(child.GetLabel(), i + 1) - self.assertEqual(child.GetParent(), tree) - self.assertEqual(child.GetChildren(), []) - children = tree.GetChildren() - self.assertEqual(len(children), 3) - children[0].AddChild('terminal', 4, isTerminal=True) - - s = str(tree) - self.assertIn('root', s) - self.assertIn(' terminal', s) - self.assertIn(' child 2', s) - - tree.NameTree(['a', 'b', 'c', 'd', 'e']) - self.assertEqual(str(tree), 'a\n b\n terminal\n c\n d\n') - - tree.PruneChild(children[1]) - self.assertEqual(str(tree), 'a\n b\n terminal\n d\n') - - f = StringIO() - with redirect_stdout(f): - tree.Print(showData=True) - s = f.getvalue() - self.assertIn('value', s) - self.assertIn('None', s) - - f = StringIO() - with redirect_stdout(f): - tree.Print() - s = f.getvalue() - self.assertNotIn('value', s) - self.assertNotIn('None', s) - - tree.Destroy() - self.assertEqual(str(tree), 'a\n') - - def _readyTree(self): - tree = self.baseTree - tree.AddChild('child0') - tree.AddChild('child1') - - def test5Equals(self): - # " testing tree equals " - nTree = Tree.TreeNode(None, 'root') - self._readyTree() - tTree = self.baseTree - self.baseTree = nTree - self._readyTree() - assert tTree == self.baseTree, 'Equality test 1 failed. (bad Tree.__cmp__)' - assert self.baseTree == tTree, 'Equality test 2 failed. (bad Tree.__cmp__)' - tTree.AddChild('child2') - assert tTree != self.baseTree, 'Inequality test 1 failed. (bad Tree.__cmp__)' - assert self.baseTree != tTree, 'Inequality test 2 failed. (bad Tree.__cmp__)' - - self.assertTrue(tTree > self.baseTree, msg='Larger tree is greater') - self.assertEqual(tTree.__cmp__(self.baseTree), 1) - - def test6PickleEquals(self): - # " testing pickled tree equals " - self._readyTree() - pkl = pickle.dumps(self.baseTree) - oTree = pickle.loads(pkl) - - assert oTree == self.baseTree, 'Pickle inequality test failed' - self.assertEqual(oTree.__cmp__(self.baseTree), 0) - - self.baseTree.PruneChild(self.baseTree.GetChildren()[0]) - assert oTree != self.baseTree, 'Pickle inequality test failed (bad Tree.__cmp__)' - self.assertEqual(abs(oTree.__cmp__(self.baseTree)), 1) - - def test7Copy(self): - # " testing deepcopy on trees " - self._readyTree() - nTree = copy.deepcopy(self.baseTree) - assert nTree == self.baseTree, 'deepcopy failed' - - def test8In(self): - # " testing list membership " - self._readyTree() - nTree = copy.deepcopy(self.baseTree) - nTree2 = copy.deepcopy(self.baseTree) - nTree2.PruneChild(self.baseTree.GetChildren()[0]) - tList = [nTree2, nTree2, nTree] - assert self.baseTree in tList, 'list membership (tree in list) failed' - tList = [nTree2, nTree2] - assert self.baseTree not in tList, 'list membership (tree not in list) failed' - - def test_exampleCode(self): - try: - f = StringIO() - with redirect_stdout(f): - Tree._exampleCode() - self.assertTrue(os.path.isfile('save.pkl')) - self.assertIn('tree==tree2 False', f.getvalue(), 'Example didn' 't run to end') - finally: - if os.path.isfile('save.pkl'): - os.remove('save.pkl') - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/DecTree/UnitTestTreeUtils.py b/rdkit/ML/DecTree/UnitTestTreeUtils.py deleted file mode 100644 index 0df2d753264..00000000000 --- a/rdkit/ML/DecTree/UnitTestTreeUtils.py +++ /dev/null @@ -1,48 +0,0 @@ -# -# Copyright (C) 2000 greg Landrum -# -""" unit testing code for trees and decision trees (not learning/xvalidation) """ - -import unittest - -from rdkit.ML.DecTree import TreeUtils -from rdkit.ML.DecTree.DecTree import DecTreeNode as Node - - -class TestTreeUtils(unittest.TestCase): - - def test_TreeUtils(self): - # Tree is d1(d2,d3(d2,d4)) - t1 = Node(None, 'd1', 1) - t2 = Node(None, 'd2', 2) - t1.AddChildNode(t2) - t2 = Node(None, 'd3', 3) - t1.AddChildNode(t2) - t3 = Node(None, 'd4', 4) - t2.AddChildNode(t3) - t3 = Node(None, 'd2', 2) - t2.AddChildNode(t3) - - r = TreeUtils.CollectLabelLevels(t1, {}) - self.assertEqual(r, {1: 0, 2: 1, 3: 1, 4: 2}) - - # Only to depth 2 - r = TreeUtils.CollectLabelLevels(t1, {}, 0, 2) - self.assertEqual(r, {1: 0, 2: 1, 3: 1}) - - # Check that we can handle subtrees: - r = TreeUtils.CollectLabelLevels(t1, {}, 1, 2) - self.assertEqual(r, {1: 1}) - - names = TreeUtils.CollectDescriptorNames(t1, {}) - self.assertEqual(names, {1: 'd1', 2: 'd2', 3: 'd3', 4: 'd4'}) - - names = TreeUtils.CollectDescriptorNames(t1, {}, 0, 2) - self.assertEqual(names, {1: 'd1', 2: 'd2', 3: 'd3'}) - - names = TreeUtils.CollectDescriptorNames(t1, {}, 1, 2) - self.assertEqual(names, {1: 'd1'}) - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/DecTree/UnitTestXVal.py b/rdkit/ML/DecTree/UnitTestXVal.py deleted file mode 100644 index 802fb512ff6..00000000000 --- a/rdkit/ML/DecTree/UnitTestXVal.py +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright (C) 2000 greg Landrum -# -""" unit testing code for cross validation """ - -import os -import pickle -import unittest -from io import BytesIO, StringIO - -from rdkit import RDConfig, RDRandom -from rdkit.ML.DecTree import CrossValidate, randomtest -from rdkit.TestRunner import redirect_stdout - - -class XValTestCase(unittest.TestCase): - - def setUp(self): - self.origTreeName = RDConfig.RDCodeDir + '/ML/DecTree/test_data/XValTree.pkl' - self.randomSeed = 23 - self.randomArraySeed = (23, 42) - - def testRun(self): - # " test that the CrossValidationDriver runs " - examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nExamples=200) - f = StringIO() - with redirect_stdout(f): - tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, silent=False) - self.assertGreater(frac, 0) - self.assertEqual('Var: 1', tree.GetName()) - self.assertIn('Validation error', f.getvalue()) - - CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, lessGreedy=True, - calcTotalError=True, silent=True) - - def testResults(self): - # " test the results of CrossValidation " - RDRandom.seed(self.randomSeed) - examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nExamples=200, - seed=self.randomArraySeed) - tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, silent=1) - self.assertGreater(frac, 0) - - with open(self.origTreeName, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - inFile = BytesIO(buf) - oTree = pickle.load(inFile) - - assert oTree == tree, 'Random CrossValidation test failed' - - def testReplacementSelection(self): - # " use selection with replacement " - RDRandom.seed(self.randomSeed) - examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nExamples=200, - seed=self.randomArraySeed) - tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, silent=1, - replacementSelection=1) - self.assertTrue(tree) - self.assertAlmostEqual(frac, 0.01666, 4) - - def test_TestRun(self): - try: - f = StringIO() - with redirect_stdout(f): - CrossValidate.TestRun() - self.assertTrue(os.path.isfile('save.pkl')) - s = f.getvalue() - self.assertIn('t1 == t2 True', s) - finally: - if os.path.isfile('save.pkl'): - os.remove('save.pkl') - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/DecTree/__init__.py b/rdkit/ML/DecTree/__init__.py deleted file mode 100644 index 9aac1e2e72b..00000000000 --- a/rdkit/ML/DecTree/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# copyright 2000, greg landrum -""" - -Here we're implementing the Decision Tree stuff found in Chapter 3 of -Tom Mitchell's Machine Learning Book. - -""" -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) diff --git a/rdkit/ML/DecTree/randomtest.py b/rdkit/ML/DecTree/randomtest.py deleted file mode 100755 index 6361647a9e9..00000000000 --- a/rdkit/ML/DecTree/randomtest.py +++ /dev/null @@ -1,37 +0,0 @@ -import random - -import numpy - -from rdkit.ML.DecTree import ID3 - - -def GenRandomExamples(nVars=10, randScale=0.3, bitProb=0.5, nExamples=500, seed=(0, 0), - addResults=1): - random.seed(seed[0]) - varWeights = numpy.array([random.random() for _ in range(nVars)]) * randScale - examples = [None] * nExamples - - for i in range(nExamples): - varVals = [random.random() > bitProb for _ in range(nVars)] - temp = numpy.array(varVals) * varWeights - res = sum(temp) - if addResults: - varVals.append(res >= 1.) - examples[i] = varVals - - nPossibleVals = [2] * (nExamples + 1) - attrs = list(range(nVars)) - - return (examples, attrs, nPossibleVals) - - -if __name__ == '__main__': # pragma: nocover - import pickle - examples, attrs, nPossibleVals = GenRandomExamples() - outF = open('random.dat.pkl', 'wb+') - pickle.dump(examples, outF) - pickle.dump(attrs, outF) - pickle.dump(nPossibleVals, outF) - - tree = ID3.ID3Boot(examples, attrs, nPossibleVals) - tree.Pickle('save.pkl') diff --git a/rdkit/ML/DecTree/test_data/BasicTree.pkl b/rdkit/ML/DecTree/test_data/BasicTree.pkl deleted file mode 100644 index 5e55d00fab7..00000000000 --- a/rdkit/ML/DecTree/test_data/BasicTree.pkl +++ /dev/null @@ -1,287 +0,0 @@ -(irdkit.ML.DecTree.DecTree -DecTreeNode -p1 -(dp2 -S'children' -p3 -(lp4 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp5 -g3 -(lp6 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp7 -g3 -(lsS'data' -p8 -F0 -sS'trainingExamples' -p9 -(lsS'examples' -p10 -(lp11 -(lp12 -I0 -aI0 -aI0 -aI0 -aI0 -aa(lp13 -I0 -aI0 -aI0 -aI1 -aI0 -aa(lp14 -I0 -aI1 -aI0 -aI0 -aI0 -aasS'testExamples' -p15 -(lsS'terminalNode' -p16 -I1 -sS'label' -p17 -I0 -sS'badExamples' -p18 -(lsS'level' -p19 -I1 -sS'parent' -p20 -NsS'name' -p21 -S'0' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp22 -g3 -(lsg8 -F0 -sg9 -(lsg10 -(lp23 -(lp24 -I0 -aI2 -aI1 -aI0 -aI1 -aa(lp25 -I0 -aI1 -aI1 -aI1 -aI1 -aasg15 -(lsg16 -I1 -sg17 -I1 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'1' -sbasg8 -F0.97095059445466858 -sg9 -(lsg10 -(lp26 -g12 -ag13 -ag14 -ag24 -ag25 -asg15 -(lsg16 -I0 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 2' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp27 -g3 -(lsg8 -F0 -sg9 -(lsg10 -(lp28 -(lp29 -I1 -aI0 -aI0 -aI0 -aI1 -aa(lp30 -I1 -aI2 -aI1 -aI1 -aI1 -aa(lp31 -I1 -aI1 -aI0 -aI1 -aI1 -aa(lp32 -I1 -aI0 -aI1 -aI0 -aI1 -aasg15 -(lsg16 -I1 -sg17 -I1 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'1' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp33 -g3 -(lp34 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp35 -g3 -(lsg8 -F0 -sg9 -(lsg10 -(lp36 -(lp37 -I2 -aI1 -aI0 -aI0 -aI1 -aa(lp38 -I2 -aI2 -aI1 -aI0 -aI1 -aa(lp39 -I2 -aI1 -aI1 -aI0 -aI1 -aasg15 -(lsg16 -I1 -sg17 -I1 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'1' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp40 -g3 -(lsg8 -F0 -sg9 -(lsg10 -(lp41 -(lp42 -I2 -aI2 -aI1 -aI1 -aI0 -aa(lp43 -I2 -aI1 -aI0 -aI1 -aI0 -aasg15 -(lsg16 -I1 -sg17 -I0 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'0' -sbasg8 -F0.97095059445466858 -sg9 -(lsg10 -(lp44 -g37 -ag38 -ag42 -ag39 -ag43 -asg15 -(lsg16 -I0 -sg17 -I3 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 3' -sbasg8 -F0.94028595867063092 -sg9 -(lsg10 -(lp45 -g12 -ag13 -ag29 -ag37 -ag38 -ag42 -ag30 -ag14 -ag24 -ag39 -ag25 -ag31 -ag32 -ag43 -asg15 -(lsg16 -I0 -sg17 -I0 -sg18 -(lsg19 -I0 -sg20 -Nsg21 -S'Var: 0' -sS'_nResultCodes' -p46 -I2 -sb. diff --git a/rdkit/ML/DecTree/test_data/CmpTree1.pkl b/rdkit/ML/DecTree/test_data/CmpTree1.pkl deleted file mode 100644 index 102f795f018..00000000000 --- a/rdkit/ML/DecTree/test_data/CmpTree1.pkl +++ /dev/null @@ -1,1015 +0,0 @@ -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -p1 -(dp2 -S'totNChildren' -p3 -I4 -sS'_nResultCodes' -p4 -I2 -sS'_bBox' -p5 -(I290 -I40 -I310 -I60 -tsS'testExamples' -p6 -(lp7 -(lp8 -S'Cr3Ru' -aF0.50762640931400005 -aF12.80469235 -aI1 -aI1 -aI0 -aF3.6748180389399998 -aI0 -aa(lp9 -S'CoIr' -aF0.72041093624399999 -aF12.4928697598 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI0 -aa(lp10 -S'Ir3V' -aF0.57768528130600005 -aF13.848370832500001 -aI1 -aI0 -aI1 -aF2.2877440452600002 -aI0 -aa(lp11 -S'Cr' -aF0.50234696502099996 -aF11.943936000000001 -aI1 -aI0 -aI0 -aF3.6748180389399998 -aI0 -aa(lp12 -S'Cr4Pt' -aF0.52196710148900005 -aF13.027640977000001 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI1 -aa(lp13 -S'MnPd3' -aF0.69217333302100004 -aF13.3637046657 -aI1 -aI1 -aI0 -aF4.2085700035100002 -aI1 -aa(lp14 -S'Cr2Hf' -aF0.17582690790399999 -aF30.332861999999999 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI0 -aa(lp15 -S'IrSc' -aF0.36415870162199998 -aF16.47633291 -aI1 -aI0 -aI1 -aF1.6517590284300001 -aI0 -aa(lp16 -S'HfPt' -aF0.41545163138500002 -aF16.849133500000001 -aI0 -aI0 -aI1 -aF1.0095700025600001 -aI0 -aa(lp17 -S'HfW2' -aF0.14637086733599999 -aF36.437123249999999 -aI0 -aI0 -aI1 -aF2.1384620666499998 -aI0 -aa(lp18 -S'Ir2Sc' -aF0.211811319911 -aF33.048280908300001 -aI1 -aI0 -aI1 -aF1.6517590284300001 -aI0 -aa(lp19 -S'IrV3' -aF0.43729933011299998 -aF13.7205789875 -aI1 -aI0 -aI1 -aF2.2877440452600002 -aI0 -aa(lp20 -S'FeNi3' -aF0.84758124223099995 -aF11.2083650825 -aI1 -aI0 -aI0 -aF3.5345430374100002 -aI1 -aa(lp21 -S'FePt' -aF0.63528643082000003 -aF14.1668380803 -aI1 -aI0 -aI1 -aF3.5345430374100002 -aI1 -aa(lp22 -S'Mo' -aF0.38392782156999999 -aF15.6279375 -aI0 -aI1 -aI0 -aF2.9241099357599998 -aI0 -aa(lp23 -S'MnPd' -aF0.57281499373 -aF14.838997046199999 -aI1 -aI1 -aI0 -aF4.2085700035100002 -aI1 -aa(lp24 -S'Ir3Nb' -aF0.54237138330800005 -aF14.750040739999999 -aI0 -aI1 -aI1 -aF2.3222479820299999 -aI0 -aa(lp25 -S'W' -aF0.38029445946500001 -aF15.777248 -aI0 -aI0 -aI1 -aF2.1384620666499998 -aI0 -aa(lp26 -S'HfIr3' -aF0.51228401899599996 -aF15.128326695 -aI0 -aI0 -aI1 -aF1.6517590284300001 -aI0 -aa(lp27 -S'Fe16Rh' -aF0.66885138772300001 -aF12.0487505556 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aa(lp28 -S'Cr3Ir' -aF0.52512775650900001 -aF12.8540148875 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI0 -aasS'parent' -p29 -NsS'level' -p30 -I0 -sS'qBounds' -p31 -(lp32 -F0.51934236764949993 -asS'badExamples' -p33 -(lp34 -(lp35 -S'NiPd' -aF0.75248259056300004 -aF13.289344 -aI1 -aI1 -aI0 -aF1.89935600758 -aI1 -aa(lp36 -S'NiPt' -aF0.76294022101600001 -aF13.1071868078 -aI1 -aI0 -aI1 -aF1.89935600758 -aI1 -aa(lp37 -S'Co3Pt' -aF0.74974528469799995 -aF12.337523407999999 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI1 -aag28 -a(lp38 -S'CoPt' -aF0.71123755334299998 -aF13.356999999999999 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI1 -aasS'nExamples' -p39 -I51 -sS'label' -p40 -I1 -sS'terminalNode' -p41 -I0 -sS'trainingExamples' -p42 -(lp43 -(lp44 -S'CrPt3' -aF0.61966934164099996 -aF14.523874904299999 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI1 -aa(lp45 -S'Co4Os' -aF0.77363839477700003 -aF11.37482325 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI1 -aa(lp46 -S'V' -aF0.363060641874 -aF13.771803999999999 -aI1 -aI0 -aI0 -aF2.2877440452600002 -aI0 -aa(lp47 -S'FePt3' -aF0.65460233585500005 -aF14.5126277125 -aI1 -aI0 -aI1 -aF3.5345430374100002 -aI1 -aa(lp48 -S'HfRh3' -aF0.51820099910899997 -aF14.955586757500001 -aI0 -aI1 -aI1 -aF1.3449230194099999 -aI0 -aa(lp49 -S'Cr3Os' -aF0.50600252550699998 -aF12.845785687499999 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI0 -aa(lp50 -S'Ir2Y' -aF0.197054630259 -aF35.523143966699998 -aI0 -aI1 -aI1 -aF1.6517590284300001 -aI0 -aa(lp51 -S'CoOs' -aF0.68064545647499997 -aF12.488146242899999 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI0 -aa(lp52 -S'Rh' -aF0.65607231374800001 -aF13.718 -aI0 -aI1 -aI0 -aF1.3449230194099999 -aI0 -aa(lp53 -S'CoSc' -aF0.38576186851799998 -aF15.553636815000001 -aI1 -aI0 -aI0 -aF2.7561609745000002 -aI0 -aa(lp54 -S'Cr2Ta' -aF0.20160161710999999 -aF28.108240141700001 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI0 -aa(lp55 -S'CrPt3' -aF0.619669341609 -aF14.523874905 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI1 -aa(lp56 -S'CoTi2' -aF0.22308158424499999 -aF25.401768083299999 -aI1 -aI0 -aI0 -aF2.7561609745000002 -aI0 -aa(lp57 -S'Mn2Pt3' -aF0.59614900292999995 -aF14.761410246000001 -aI1 -aI0 -aI1 -aF4.2085700035100002 -aI1 -aa(lp58 -S'HfOs' -aF0.35314121102500001 -aF16.990370460000001 -aI0 -aI0 -aI1 -aF2.1916060447699999 -aI0 -aa(lp59 -S'Co4Ru' -aF0.76717348355500004 -aF11.470677999999999 -aI1 -aI1 -aI0 -aF2.7561609745000002 -aI1 -aa(lp60 -S'Cr2Ti' -aF0.190316268905 -aF28.023528225 -aI1 -aI0 -aI0 -aF3.6748180389399998 -aI0 -aa(lp61 -S'CoRu' -aF0.69719873105899999 -aF12.1916458269 -aI1 -aI1 -aI0 -aF2.7561609745000002 -aI0 -aa(lp62 -S'Fe3Rh7' -aF0.64858637129100005 -aF13.4137878702 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aa(lp63 -S'Pd' -aF0.67953399393400005 -aF14.71596725 -aI0 -aI1 -aI0 -aF0 -aI0 -aag35 -a(lp64 -S'CrPt' -aF0.52932719092500002 -aF15.113525504 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI1 -aa(lp65 -S'Co3Ir7' -aF0.74435726980799999 -aF12.090968094300001 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI0 -aa(lp66 -S'Cr3Pt' -aF0.52048373618999999 -aF13.449027343699999 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI1 -aa(lp67 -S'CoZr' -aF0.39896850829199998 -aF16.29201269 -aI1 -aI1 -aI0 -aF2.7561609745000002 -aI0 -aa(lp68 -S'Ir3Ta' -aF0.54530764893100003 -aF14.670617614999999 -aI0 -aI0 -aI1 -aF1.6517590284300001 -aI0 -aa(lp69 -S'Fe3Pt' -aF0.65516761432500004 -aF12.97377925 -aI1 -aI0 -aI1 -aF3.5345430374100002 -aI1 -aa(lp70 -S'FeRh' -aF0.63660665777900005 -aF13.3520438345 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aa(lp71 -S'Fe3Pd' -aF0.61090143443599998 -aF13.9138648575 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aa(lp72 -S'Ni' -aF0.91713232531900002 -aF10.903551999999999 -aI1 -aI0 -aI0 -aF1.89935600758 -aI1 -aa(lp73 -S'Co3Rh' -aF0.75038022309600005 -aF11.9939195131 -aI1 -aI1 -aI0 -aF2.7561609745000002 -aI1 -aa(lp74 -S'Co4Rh' -aF0.77298002729199999 -aF11.6432503845 -aI1 -aI1 -aI0 -aF2.7561609745000002 -aI1 -aa(lp75 -S'Fe13Pt7' -aF0.64483495005400004 -aF13.491824534699999 -aI1 -aI0 -aI1 -aF3.5345430374100002 -aI1 -aa(lp76 -S'Fe9Rh' -aF0.66471510517099996 -aF12.1856716313 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aag36 -a(lp77 -S'NiRh' -aF0.75971012422799999 -aF12.504769512799999 -aI1 -aI1 -aI0 -aF1.89935600758 -aI0 -aa(lp78 -S'MnPt' -aF0.57901907356899995 -aF14.68 -aI1 -aI0 -aI1 -aF4.2085700035100002 -aI1 -aa(lp79 -S'Cr2Zr' -aF0.17040054037399999 -aF31.298805283299998 -aI1 -aI1 -aI0 -aF3.6748180389399998 -aI0 -aa(lp80 -S'Cr2Nb' -aF0.20256037807300001 -aF27.975197916700001 -aI1 -aI1 -aI0 -aF3.6748180389399998 -aI0 -aa(lp81 -S'IrNb3' -aF0.35471017322300002 -aF16.915218262500002 -aI0 -aI1 -aI1 -aF2.3222479820299999 -aI0 -aa(lp82 -S'HfNi2' -aF0.29146842287500002 -aF27.447227116699999 -aI1 -aI0 -aI1 -aF1.89935600758 -aI0 -aa(lp83 -S'AlFe3' -aF0.55352822993999995 -aF12.1945 -aI1 -aI0 -aI0 -aF3.5345430374100002 -aI1 -aa(lp84 -S'FePd26' -aF0.67590684743700002 -aF14.6853460112 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aa(lp85 -S'Fe' -aF0.67682172807600005 -aF11.8199515 -aI1 -aI0 -aI0 -aF3.5345430374100002 -aI1 -aag37 -a(lp86 -S'HfMo2' -aF0.14900548028999999 -aF35.792866966699997 -aI0 -aI1 -aI1 -aF2.9241099357599998 -aI0 -aa(lp87 -S'CoV3' -aF0.46978202072199998 -aF12.7718808625 -aI1 -aI0 -aI0 -aF2.7561609745000002 -aI0 -aag38 -a(lp88 -S'CoTi' -aF0.483896935837 -aF13.43261244 -aI1 -aI0 -aI0 -aF2.7561609745000002 -aI0 -aa(lp89 -S'FeCo' -aF0.73405988833699998 -aF11.579436684999999 -aI1 -aI0 -aI0 -aF3.5345430374100002 -aI1 -aa(lp90 -S'HfTc' -aF0.31459326965500001 -aF17.482891500000001 -aI0 -aI1 -aI1 -aF2.9957029819500001 -aI0 -aasS'examples' -p91 -g43 -sS'_gridName' -p92 -S'Model 2' -sS'_varNames' -p93 -(lp94 -S'COMPOUND' -aS'ELCONC' -p95 -aS'ATVOL' -p96 -aS'HAS3D' -p97 -aS'HAS4D' -p98 -aS'HAS5D' -p99 -aS'MAX_DED' -p100 -aS'ISFERROMAGNETIC' -asS'data' -p101 -F0.51732874925004757 -sS'children' -p102 -(lp103 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp104 -g3 -I1 -sg5 -(I260 -I90 -I280 -I110 -tsg6 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg39 -I18 -sg40 -I0 -sg41 -I1 -sg42 -(lsg91 -(lp105 -g46 -ag48 -ag49 -ag50 -ag53 -ag54 -ag56 -ag58 -ag60 -ag67 -ag79 -ag80 -ag81 -ag82 -ag86 -ag87 -ag88 -ag90 -asg101 -I-666 -sg102 -(lsS'name' -p106 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp107 -g3 -I3 -sg5 -(I300 -I90 -I320 -I110 -tsg6 -(lsg29 -Nsg30 -I1 -sg31 -(lp108 -F3.145352005955 -asg33 -(lsg39 -I33 -sg40 -I6 -sg41 -I0 -sg42 -(lsg91 -(lp109 -g44 -ag45 -ag47 -ag51 -ag52 -ag55 -ag57 -ag59 -ag61 -ag62 -ag63 -ag35 -ag64 -ag65 -ag66 -ag68 -ag69 -ag70 -ag71 -ag72 -ag73 -ag74 -ag75 -ag76 -ag36 -ag77 -ag78 -ag83 -ag84 -ag85 -ag37 -ag38 -ag89 -asg101 -I-666 -sg102 -(lp110 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp111 -g3 -I2 -sg5 -(I290 -I140 -I310 -I160 -tsg6 -(lsg29 -Nsg30 -I1 -sg31 -(lp112 -F12.042443803699999 -asg33 -(lsg39 -I16 -sg40 -I2 -sg41 -I0 -sg42 -(lsg91 -(lp113 -g45 -ag51 -ag52 -ag59 -ag61 -ag63 -ag35 -ag65 -ag68 -ag72 -ag73 -ag74 -ag36 -ag77 -ag37 -ag38 -asg101 -I-666 -sg102 -(lp114 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp115 -g3 -I1 -sg5 -(I280 -I190 -I300 -I210 -tsg6 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg39 -I5 -sg40 -I1 -sg41 -I1 -sg42 -(lsg91 -(lp116 -g45 -ag59 -ag72 -ag73 -ag74 -asg101 -I-666 -sg102 -(lsg106 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp117 -g3 -I1 -sg5 -(I300 -I190 -I320 -I210 -tsg6 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg39 -I11 -sg40 -I0 -sg41 -I1 -sg42 -(lsg91 -(lp118 -g51 -ag52 -ag61 -ag63 -ag35 -ag65 -ag68 -ag36 -ag77 -ag37 -ag38 -asg101 -I-666 -sg102 -(lsg106 -S'0?' -sbasg106 -g96 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp119 -g3 -I1 -sg5 -(I320 -I140 -I340 -I160 -tsg6 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg39 -I17 -sg40 -I1 -sg41 -I1 -sg42 -(lsg91 -(lp120 -g44 -ag47 -ag55 -ag57 -ag62 -ag64 -ag66 -ag69 -ag70 -ag71 -ag75 -ag76 -ag78 -ag83 -ag84 -ag85 -ag89 -asg101 -I-666 -sg102 -(lsg106 -S'1' -sbasg106 -g100 -sbasg106 -g95 -sb. diff --git a/rdkit/ML/DecTree/test_data/CmpTree2.pkl b/rdkit/ML/DecTree/test_data/CmpTree2.pkl deleted file mode 100644 index 397bde58a26..00000000000 --- a/rdkit/ML/DecTree/test_data/CmpTree2.pkl +++ /dev/null @@ -1,1011 +0,0 @@ -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -p1 -(dp2 -S'totNChildren' -p3 -I4 -sS'_nResultCodes' -p4 -I2 -sS'_bBox' -p5 -(I290 -I40 -I310 -I60 -tsS'testExamples' -p6 -(lp7 -(lp8 -S'FePt3' -aF0.65460233585500005 -aF14.5126277125 -aI1 -aI0 -aI1 -aF3.5345430374100002 -aI1 -aa(lp9 -S'Cr3Ru' -aF0.50762640931400005 -aF12.80469235 -aI1 -aI1 -aI0 -aF3.6748180389399998 -aI0 -aa(lp10 -S'Rh' -aF0.65607231374800001 -aF13.718 -aI0 -aI1 -aI0 -aF1.3449230194099999 -aI0 -aa(lp11 -S'IrSc' -aF0.36415870162199998 -aF16.47633291 -aI1 -aI0 -aI1 -aF1.6517590284300001 -aI0 -aa(lp12 -S'HfOs' -aF0.35314121102500001 -aF16.990370460000001 -aI0 -aI0 -aI1 -aF2.1916060447699999 -aI0 -aa(lp13 -S'CoRu' -aF0.69719873105899999 -aF12.1916458269 -aI1 -aI1 -aI0 -aF2.7561609745000002 -aI0 -aa(lp14 -S'HfW2' -aF0.14637086733599999 -aF36.437123249999999 -aI0 -aI0 -aI1 -aF2.1384620666499998 -aI0 -aa(lp15 -S'Cr3Pt' -aF0.52048373618999999 -aF13.449027343699999 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI1 -aa(lp16 -S'Fe3Pt' -aF0.65516761432500004 -aF12.97377925 -aI1 -aI0 -aI1 -aF3.5345430374100002 -aI1 -aa(lp17 -S'FeRh' -aF0.63660665777900005 -aF13.3520438345 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aa(lp18 -S'Fe13Pt7' -aF0.64483495005400004 -aF13.491824534699999 -aI1 -aI0 -aI1 -aF3.5345430374100002 -aI1 -aa(lp19 -S'NiPt' -aF0.76294022101600001 -aF13.1071868078 -aI1 -aI0 -aI1 -aF1.89935600758 -aI1 -aa(lp20 -S'Mo' -aF0.38392782156999999 -aF15.6279375 -aI0 -aI1 -aI0 -aF2.9241099357599998 -aI0 -aa(lp21 -S'Cr2Nb' -aF0.20256037807300001 -aF27.975197916700001 -aI1 -aI1 -aI0 -aF3.6748180389399998 -aI0 -aa(lp22 -S'IrNb3' -aF0.35471017322300002 -aF16.915218262500002 -aI0 -aI1 -aI1 -aF2.3222479820299999 -aI0 -aa(lp23 -S'FePd26' -aF0.67590684743700002 -aF14.6853460112 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aa(lp24 -S'Ir3Nb' -aF0.54237138330800005 -aF14.750040739999999 -aI0 -aI1 -aI1 -aF2.3222479820299999 -aI0 -aa(lp25 -S'Cr3Ir' -aF0.52512775650900001 -aF12.8540148875 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI0 -aa(lp26 -S'CoPt' -aF0.71123755334299998 -aF13.356999999999999 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI1 -aa(lp27 -S'FeCo' -aF0.73405988833699998 -aF11.579436684999999 -aI1 -aI0 -aI0 -aF3.5345430374100002 -aI1 -aasS'parent' -p28 -NsS'level' -p29 -I0 -sS'qBounds' -p30 -(lp31 -F0.52008405029900007 -asS'badExamples' -p32 -(lp33 -(lp34 -S'NiPd' -aF0.75248259056300004 -aF13.289344 -aI1 -aI1 -aI0 -aF1.89935600758 -aI1 -aag19 -a(lp35 -S'Co3Pt' -aF0.74974528469799995 -aF12.337523407999999 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI1 -aag25 -ag26 -asS'nExamples' -p36 -I52 -sS'label' -p37 -I1 -sS'terminalNode' -p38 -I0 -sS'trainingExamples' -p39 -(lp40 -(lp41 -S'CrPt3' -aF0.61966934164099996 -aF14.523874904299999 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI1 -aa(lp42 -S'Co4Os' -aF0.77363839477700003 -aF11.37482325 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI1 -aa(lp43 -S'V' -aF0.363060641874 -aF13.771803999999999 -aI1 -aI0 -aI0 -aF2.2877440452600002 -aI0 -aa(lp44 -S'HfRh3' -aF0.51820099910899997 -aF14.955586757500001 -aI0 -aI1 -aI1 -aF1.3449230194099999 -aI0 -aa(lp45 -S'Cr3Os' -aF0.50600252550699998 -aF12.845785687499999 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI0 -aa(lp46 -S'Ir2Y' -aF0.197054630259 -aF35.523143966699998 -aI0 -aI1 -aI1 -aF1.6517590284300001 -aI0 -aa(lp47 -S'CoOs' -aF0.68064545647499997 -aF12.488146242899999 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI0 -aa(lp48 -S'CoIr' -aF0.72041093624399999 -aF12.4928697598 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI0 -aa(lp49 -S'Ir3V' -aF0.57768528130600005 -aF13.848370832500001 -aI1 -aI0 -aI1 -aF2.2877440452600002 -aI0 -aa(lp50 -S'Cr' -aF0.50234696502099996 -aF11.943936000000001 -aI1 -aI0 -aI0 -aF3.6748180389399998 -aI0 -aa(lp51 -S'Cr4Pt' -aF0.52196710148900005 -aF13.027640977000001 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI1 -aa(lp52 -S'CoSc' -aF0.38576186851799998 -aF15.553636815000001 -aI1 -aI0 -aI0 -aF2.7561609745000002 -aI0 -aa(lp53 -S'Cr2Ta' -aF0.20160161710999999 -aF28.108240141700001 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI0 -aa(lp54 -S'CrPt3' -aF0.619669341609 -aF14.523874905 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI1 -aa(lp55 -S'MnPd3' -aF0.69217333302100004 -aF13.3637046657 -aI1 -aI1 -aI0 -aF4.2085700035100002 -aI1 -aa(lp56 -S'Cr2Hf' -aF0.17582690790399999 -aF30.332861999999999 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI0 -aa(lp57 -S'CoTi2' -aF0.22308158424499999 -aF25.401768083299999 -aI1 -aI0 -aI0 -aF2.7561609745000002 -aI0 -aa(lp58 -S'Mn2Pt3' -aF0.59614900292999995 -aF14.761410246000001 -aI1 -aI0 -aI1 -aF4.2085700035100002 -aI1 -aa(lp59 -S'HfPt' -aF0.41545163138500002 -aF16.849133500000001 -aI0 -aI0 -aI1 -aF1.0095700025600001 -aI0 -aa(lp60 -S'Co4Ru' -aF0.76717348355500004 -aF11.470677999999999 -aI1 -aI1 -aI0 -aF2.7561609745000002 -aI1 -aa(lp61 -S'Cr2Ti' -aF0.190316268905 -aF28.023528225 -aI1 -aI0 -aI0 -aF3.6748180389399998 -aI0 -aa(lp62 -S'Fe3Rh7' -aF0.64858637129100005 -aF13.4137878702 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aa(lp63 -S'Pd' -aF0.67953399393400005 -aF14.71596725 -aI0 -aI1 -aI0 -aF0 -aI0 -aag34 -a(lp64 -S'CrPt' -aF0.52932719092500002 -aF15.113525504 -aI1 -aI0 -aI1 -aF3.6748180389399998 -aI1 -aa(lp65 -S'Ir2Sc' -aF0.211811319911 -aF33.048280908300001 -aI1 -aI0 -aI1 -aF1.6517590284300001 -aI0 -aa(lp66 -S'Co3Ir7' -aF0.74435726980799999 -aF12.090968094300001 -aI1 -aI0 -aI1 -aF2.7561609745000002 -aI0 -aa(lp67 -S'CoZr' -aF0.39896850829199998 -aF16.29201269 -aI1 -aI1 -aI0 -aF2.7561609745000002 -aI0 -aa(lp68 -S'IrV3' -aF0.43729933011299998 -aF13.7205789875 -aI1 -aI0 -aI1 -aF2.2877440452600002 -aI0 -aa(lp69 -S'Ir3Ta' -aF0.54530764893100003 -aF14.670617614999999 -aI0 -aI0 -aI1 -aF1.6517590284300001 -aI0 -aa(lp70 -S'FeNi3' -aF0.84758124223099995 -aF11.2083650825 -aI1 -aI0 -aI0 -aF3.5345430374100002 -aI1 -aa(lp71 -S'Fe3Pd' -aF0.61090143443599998 -aF13.9138648575 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aa(lp72 -S'Ni' -aF0.91713232531900002 -aF10.903551999999999 -aI1 -aI0 -aI0 -aF1.89935600758 -aI1 -aa(lp73 -S'Co3Rh' -aF0.75038022309600005 -aF11.9939195131 -aI1 -aI1 -aI0 -aF2.7561609745000002 -aI1 -aa(lp74 -S'Co4Rh' -aF0.77298002729199999 -aF11.6432503845 -aI1 -aI1 -aI0 -aF2.7561609745000002 -aI1 -aa(lp75 -S'Fe9Rh' -aF0.66471510517099996 -aF12.1856716313 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aa(lp76 -S'FePt' -aF0.63528643082000003 -aF14.1668380803 -aI1 -aI0 -aI1 -aF3.5345430374100002 -aI1 -aa(lp77 -S'NiRh' -aF0.75971012422799999 -aF12.504769512799999 -aI1 -aI1 -aI0 -aF1.89935600758 -aI0 -aa(lp78 -S'MnPt' -aF0.57901907356899995 -aF14.68 -aI1 -aI0 -aI1 -aF4.2085700035100002 -aI1 -aa(lp79 -S'MnPd' -aF0.57281499373 -aF14.838997046199999 -aI1 -aI1 -aI0 -aF4.2085700035100002 -aI1 -aa(lp80 -S'Cr2Zr' -aF0.17040054037399999 -aF31.298805283299998 -aI1 -aI1 -aI0 -aF3.6748180389399998 -aI0 -aa(lp81 -S'HfNi2' -aF0.29146842287500002 -aF27.447227116699999 -aI1 -aI0 -aI1 -aF1.89935600758 -aI0 -aa(lp82 -S'AlFe3' -aF0.55352822993999995 -aF12.1945 -aI1 -aI0 -aI0 -aF3.5345430374100002 -aI1 -aa(lp83 -S'Fe' -aF0.67682172807600005 -aF11.8199515 -aI1 -aI0 -aI0 -aF3.5345430374100002 -aI1 -aa(lp84 -S'W' -aF0.38029445946500001 -aF15.777248 -aI0 -aI0 -aI1 -aF2.1384620666499998 -aI0 -aag35 -a(lp85 -S'HfIr3' -aF0.51228401899599996 -aF15.128326695 -aI0 -aI0 -aI1 -aF1.6517590284300001 -aI0 -aa(lp86 -S'HfMo2' -aF0.14900548028999999 -aF35.792866966699997 -aI0 -aI1 -aI1 -aF2.9241099357599998 -aI0 -aa(lp87 -S'CoV3' -aF0.46978202072199998 -aF12.7718808625 -aI1 -aI0 -aI0 -aF2.7561609745000002 -aI0 -aa(lp88 -S'Fe16Rh' -aF0.66885138772300001 -aF12.0487505556 -aI1 -aI1 -aI0 -aF3.5345430374100002 -aI1 -aa(lp89 -S'CoTi' -aF0.483896935837 -aF13.43261244 -aI1 -aI0 -aI0 -aF2.7561609745000002 -aI0 -aa(lp90 -S'HfTc' -aF0.31459326965500001 -aF17.482891500000001 -aI0 -aI1 -aI1 -aF2.9957029819500001 -aI0 -aasS'examples' -p91 -g40 -sS'_gridName' -p92 -S'Model 3' -sS'_varNames' -p93 -(lp94 -S'COMPOUND' -aS'ELCONC' -p95 -aS'ATVOL' -p96 -aS'HAS3D' -p97 -aS'HAS4D' -p98 -aS'HAS5D' -p99 -aS'MAX_DED' -p100 -aS'ISFERROMAGNETIC' -asS'data' -p101 -F0.53819582828432444 -sS'children' -p102 -(lp103 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp104 -g3 -I1 -sg5 -(I260 -I90 -I280 -I110 -tsg6 -(lsg28 -Nsg29 -I1 -sg30 -(lsg32 -(lsg36 -I22 -sg37 -I0 -sg38 -I1 -sg39 -(lsg91 -(lp105 -g43 -ag44 -ag45 -ag46 -ag50 -ag52 -ag53 -ag56 -ag57 -ag59 -ag61 -ag65 -ag67 -ag68 -ag80 -ag81 -ag84 -ag85 -ag86 -ag87 -ag89 -ag90 -asg101 -I-666 -sg102 -(lsS'name' -p106 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp107 -g3 -I3 -sg5 -(I300 -I90 -I320 -I110 -tsg6 -(lsg28 -Nsg29 -I1 -sg30 -(lp108 -F3.145352005955 -asg32 -(lsg36 -I30 -sg37 -I6 -sg38 -I0 -sg39 -(lsg91 -(lp109 -g41 -ag42 -ag47 -ag48 -ag49 -ag51 -ag54 -ag55 -ag58 -ag60 -ag62 -ag63 -ag34 -ag64 -ag66 -ag69 -ag70 -ag71 -ag72 -ag73 -ag74 -ag75 -ag76 -ag77 -ag78 -ag79 -ag82 -ag83 -ag35 -ag88 -asg101 -I-666 -sg102 -(lp110 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp111 -g3 -I2 -sg5 -(I290 -I140 -I310 -I160 -tsg6 -(lsg28 -Nsg29 -I1 -sg30 -(lp112 -F12.042443803699999 -asg32 -(lsg36 -I14 -sg37 -I2 -sg38 -I0 -sg39 -(lsg91 -(lp113 -g42 -ag47 -ag48 -ag49 -ag60 -ag63 -ag34 -ag66 -ag69 -ag72 -ag73 -ag74 -ag77 -ag35 -asg101 -I-666 -sg102 -(lp114 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp115 -g3 -I1 -sg5 -(I280 -I190 -I300 -I210 -tsg6 -(lsg28 -Nsg29 -I1 -sg30 -(lsg32 -(lsg36 -I5 -sg37 -I1 -sg38 -I1 -sg39 -(lsg91 -(lp116 -g42 -ag60 -ag72 -ag73 -ag74 -asg101 -I-666 -sg102 -(lsg106 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp117 -g3 -I1 -sg5 -(I300 -I190 -I320 -I210 -tsg6 -(lsg28 -Nsg29 -I1 -sg30 -(lsg32 -(lsg36 -I9 -sg37 -I0 -sg38 -I1 -sg39 -(lsg91 -(lp118 -g47 -ag48 -ag49 -ag63 -ag34 -ag66 -ag69 -ag77 -ag35 -asg101 -I-666 -sg102 -(lsg106 -S'0?' -sbasg106 -g96 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp119 -g3 -I1 -sg5 -(I320 -I140 -I340 -I160 -tsg6 -(lsg28 -Nsg29 -I1 -sg30 -(lsg32 -(lsg36 -I16 -sg37 -I1 -sg38 -I1 -sg39 -(lsg91 -(lp120 -g41 -ag51 -ag54 -ag55 -ag58 -ag62 -ag64 -ag70 -ag71 -ag75 -ag76 -ag78 -ag79 -ag82 -ag83 -ag88 -asg101 -I-666 -sg102 -(lsg106 -S'1' -sbasg106 -g100 -sbasg106 -g95 -sb. diff --git a/rdkit/ML/DecTree/test_data/MultiTree.pkl b/rdkit/ML/DecTree/test_data/MultiTree.pkl deleted file mode 100644 index 31c7775d5d9..00000000000 --- a/rdkit/ML/DecTree/test_data/MultiTree.pkl +++ /dev/null @@ -1,341 +0,0 @@ -(irdkit.ML.DecTree.DecTree -DecTreeNode -p1 -(dp2 -S'children' -p3 -(lp4 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp5 -g3 -(lp6 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp7 -g3 -(lp8 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp9 -g3 -(lsS'data' -p10 -F0 -sS'trainingExamples' -p11 -(lsS'examples' -p12 -(lp13 -(lp14 -I0 -aI0 -aI0 -aI1 -aasS'testExamples' -p15 -(lsS'terminalNode' -p16 -I1 -sS'label' -p17 -I1 -sS'badExamples' -p18 -(lsS'level' -p19 -I1 -sS'parent' -p20 -NsS'name' -p21 -S'1' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp22 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp23 -(lp24 -I0 -aI0 -aI1 -aI2 -aasg15 -(lsg16 -I1 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'2' -sbasg10 -F1 -sg11 -(lsg12 -(lp25 -g14 -ag24 -asg15 -(lsg16 -I0 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 2' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp26 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp27 -(lp28 -I1 -aI0 -aI0 -aI2 -aa(lp29 -I1 -aI0 -aI1 -aI2 -aasg15 -(lsg16 -I1 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'2' -sbasg10 -F0.81127812445913283 -sg11 -(lsg12 -(lp30 -g14 -ag24 -ag28 -ag29 -asg15 -(lsg16 -I0 -sg17 -I0 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 0' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp31 -g3 -(lp32 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp33 -g3 -(lp34 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp35 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp36 -(lp37 -I0 -aI1 -aI0 -aI0 -aasg15 -(lsg16 -I1 -sg17 -I0 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'0' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp38 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp39 -(lp40 -I0 -aI1 -aI1 -aI2 -aasg15 -(lsg16 -I1 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'2' -sbasg10 -F1 -sg11 -(lsg12 -(lp41 -g37 -ag40 -asg15 -(lsg16 -I0 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 2' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp42 -g3 -(lp43 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp44 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp45 -(lp46 -I1 -aI1 -aI0 -aI2 -aasg15 -(lsg16 -I1 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'2' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp47 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp48 -(lp49 -I1 -aI1 -aI1 -aI0 -aasg15 -(lsg16 -I1 -sg17 -I0 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'0' -sbasg10 -F1 -sg11 -(lsg12 -(lp50 -g46 -ag49 -asg15 -(lsg16 -I0 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 2' -sbasg10 -F1 -sg11 -(lsg12 -(lp51 -g37 -ag40 -ag46 -ag49 -asg15 -(lsg16 -I0 -sg17 -I0 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 0' -sbasg10 -F1.2987949406953985 -sg11 -(lsg12 -(lp52 -g37 -ag14 -ag24 -ag40 -ag28 -ag29 -ag46 -ag49 -asg15 -(lsg16 -I0 -sg17 -I1 -sg18 -(lsg19 -I0 -sg20 -Nsg21 -S'Var: 1' -sb. diff --git a/rdkit/ML/DecTree/test_data/MultiTreeRes.pkl b/rdkit/ML/DecTree/test_data/MultiTreeRes.pkl deleted file mode 100644 index 31c7775d5d9..00000000000 --- a/rdkit/ML/DecTree/test_data/MultiTreeRes.pkl +++ /dev/null @@ -1,341 +0,0 @@ -(irdkit.ML.DecTree.DecTree -DecTreeNode -p1 -(dp2 -S'children' -p3 -(lp4 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp5 -g3 -(lp6 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp7 -g3 -(lp8 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp9 -g3 -(lsS'data' -p10 -F0 -sS'trainingExamples' -p11 -(lsS'examples' -p12 -(lp13 -(lp14 -I0 -aI0 -aI0 -aI1 -aasS'testExamples' -p15 -(lsS'terminalNode' -p16 -I1 -sS'label' -p17 -I1 -sS'badExamples' -p18 -(lsS'level' -p19 -I1 -sS'parent' -p20 -NsS'name' -p21 -S'1' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp22 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp23 -(lp24 -I0 -aI0 -aI1 -aI2 -aasg15 -(lsg16 -I1 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'2' -sbasg10 -F1 -sg11 -(lsg12 -(lp25 -g14 -ag24 -asg15 -(lsg16 -I0 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 2' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp26 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp27 -(lp28 -I1 -aI0 -aI0 -aI2 -aa(lp29 -I1 -aI0 -aI1 -aI2 -aasg15 -(lsg16 -I1 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'2' -sbasg10 -F0.81127812445913283 -sg11 -(lsg12 -(lp30 -g14 -ag24 -ag28 -ag29 -asg15 -(lsg16 -I0 -sg17 -I0 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 0' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp31 -g3 -(lp32 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp33 -g3 -(lp34 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp35 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp36 -(lp37 -I0 -aI1 -aI0 -aI0 -aasg15 -(lsg16 -I1 -sg17 -I0 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'0' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp38 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp39 -(lp40 -I0 -aI1 -aI1 -aI2 -aasg15 -(lsg16 -I1 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'2' -sbasg10 -F1 -sg11 -(lsg12 -(lp41 -g37 -ag40 -asg15 -(lsg16 -I0 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 2' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp42 -g3 -(lp43 -(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp44 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp45 -(lp46 -I1 -aI1 -aI0 -aI2 -aasg15 -(lsg16 -I1 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'2' -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -(dp47 -g3 -(lsg10 -F0 -sg11 -(lsg12 -(lp48 -(lp49 -I1 -aI1 -aI1 -aI0 -aasg15 -(lsg16 -I1 -sg17 -I0 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'0' -sbasg10 -F1 -sg11 -(lsg12 -(lp50 -g46 -ag49 -asg15 -(lsg16 -I0 -sg17 -I2 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 2' -sbasg10 -F1 -sg11 -(lsg12 -(lp51 -g37 -ag40 -ag46 -ag49 -asg15 -(lsg16 -I0 -sg17 -I0 -sg18 -(lsg19 -I1 -sg20 -Nsg21 -S'Var: 0' -sbasg10 -F1.2987949406953985 -sg11 -(lsg12 -(lp52 -g37 -ag14 -ag24 -ag40 -ag28 -ag29 -ag46 -ag49 -asg15 -(lsg16 -I0 -sg17 -I1 -sg18 -(lsg19 -I0 -sg20 -Nsg21 -S'Var: 1' -sb. diff --git a/rdkit/ML/DecTree/test_data/QuantTree1.pkl b/rdkit/ML/DecTree/test_data/QuantTree1.pkl deleted file mode 100644 index 1f6ce762276..00000000000 --- a/rdkit/ML/DecTree/test_data/QuantTree1.pkl +++ /dev/null @@ -1,380 +0,0 @@ -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -p1 -(dp2 -S'children' -p3 -(lp4 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp5 -g3 -(lp6 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp7 -g3 -(lp8 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp9 -g3 -(lsS'data' -p10 -I-666 -sS'trainingExamples' -p11 -(lsS'examples' -p12 -(lp13 -(lp14 -S'p2' -p15 -aI0 -aI0 -aF0.10000000000000001 -aI1 -aasS'testExamples' -p16 -(lsS'terminalNode' -p17 -I1 -sS'label' -p18 -I1 -sS'badExamples' -p19 -(lsS'qBounds' -p20 -(lsS'level' -p21 -I1 -sS'parent' -p22 -NsS'name' -p23 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp24 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp25 -(lp26 -S'p3' -p27 -aI0 -aI0 -aF1.1000000000000001 -aI2 -aasg16 -(lsg17 -I1 -sg18 -I2 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'2' -sbasg10 -I-666 -sg11 -(lsg12 -(lp28 -g14 -ag26 -asg16 -(lsg17 -I0 -sg18 -I3 -sg19 -(lsg20 -(lp29 -F0.60000000000000009 -asg21 -I1 -sg22 -Nsg23 -S'Var: 3' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp30 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp31 -(lp32 -S'p5' -p33 -aI1 -aI0 -aF0.10000000000000001 -aI2 -aa(lp34 -S'p6' -p35 -aI1 -aI0 -aF1.1000000000000001 -aI2 -aasg16 -(lsg17 -I1 -sg18 -I2 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'2' -sbasg10 -I-666 -sg11 -(lsg12 -(lp36 -g14 -ag26 -ag32 -ag34 -asg16 -(lsg17 -I0 -sg18 -I1 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'Var: 1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp37 -g3 -(lp38 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp39 -g3 -(lp40 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp41 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp42 -(lp43 -S'p1' -p44 -aI0 -aI1 -aF0.10000000000000001 -aI0 -aasg16 -(lsg17 -I1 -sg18 -I0 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp45 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp46 -(lp47 -S'p4' -p48 -aI0 -aI1 -aF1.1000000000000001 -aI2 -aasg16 -(lsg17 -I1 -sg18 -I2 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'2' -sbasg10 -I-666 -sg11 -(lsg12 -(lp49 -g43 -ag47 -asg16 -(lsg17 -I0 -sg18 -I3 -sg19 -(lsg20 -(lp50 -F0.60000000000000009 -asg21 -I1 -sg22 -Nsg23 -S'Var: 3' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp51 -g3 -(lp52 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp53 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp54 -(lp55 -S'p7' -p56 -aI1 -aI1 -aF0.10000000000000001 -aI2 -aasg16 -(lsg17 -I1 -sg18 -I2 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'2' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp57 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp58 -(lp59 -S'p8' -p60 -aI1 -aI1 -aF1.1000000000000001 -aI0 -aasg16 -(lsg17 -I1 -sg18 -I0 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'0' -sbasg10 -I-666 -sg11 -(lsg12 -(lp61 -g55 -ag59 -asg16 -(lsg17 -I0 -sg18 -I3 -sg19 -(lsg20 -(lp62 -F0.60000000000000009 -asg21 -I1 -sg22 -Nsg23 -S'Var: 3' -sbasg10 -I-666 -sg11 -(lsg12 -(lp63 -g43 -ag47 -ag55 -ag59 -asg16 -(lsg17 -I0 -sg18 -I1 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'Var: 1' -sbasg10 -F0.393155878465832 -sg11 -(lsg12 -(lp64 -g43 -ag14 -ag26 -ag47 -ag32 -ag34 -ag55 -ag59 -asg16 -(lsg17 -I0 -sg18 -I2 -sg19 -(lsg20 -(lsg21 -I0 -sg22 -Nsg23 -S'Var: 2' -sS'_nResultCodes' -p65 -I3 -sb. diff --git a/rdkit/ML/DecTree/test_data/QuantTree2.pkl b/rdkit/ML/DecTree/test_data/QuantTree2.pkl deleted file mode 100644 index 511a2b8acf4..00000000000 --- a/rdkit/ML/DecTree/test_data/QuantTree2.pkl +++ /dev/null @@ -1,384 +0,0 @@ -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -p1 -(dp2 -S'children' -p3 -(lp4 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp5 -g3 -(lp6 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp7 -g3 -(lp8 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp9 -g3 -(lsS'data' -p10 -I-666 -sS'trainingExamples' -p11 -(lsS'examples' -p12 -(lp13 -(lp14 -S'p2' -p15 -aF0.10000000000000001 -aI0 -aF0.10000000000000001 -aI1 -aasS'testExamples' -p16 -(lsS'terminalNode' -p17 -I1 -sS'label' -p18 -I1 -sS'badExamples' -p19 -(lsS'qBounds' -p20 -(lsS'level' -p21 -I1 -sS'parent' -p22 -NsS'name' -p23 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp24 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp25 -(lp26 -S'p3' -p27 -aF0.10000000000000001 -aI0 -aF1.1000000000000001 -aI2 -aasg16 -(lsg17 -I1 -sg18 -I2 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'2' -sbasg10 -I-666 -sg11 -(lsg12 -(lp28 -g14 -ag26 -asg16 -(lsg17 -I0 -sg18 -I3 -sg19 -(lsg20 -(lp29 -F0.60000000000000009 -asg21 -I1 -sg22 -Nsg23 -S'Var: 3' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp30 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp31 -(lp32 -S'p5' -p33 -aF1.1000000000000001 -aI0 -aF0.10000000000000001 -aI2 -aa(lp34 -S'p6' -p35 -aF1.1000000000000001 -aI0 -aF1.1000000000000001 -aI2 -aasg16 -(lsg17 -I1 -sg18 -I2 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'2' -sbasg10 -I-666 -sg11 -(lsg12 -(lp36 -g14 -ag26 -ag32 -ag34 -asg16 -(lsg17 -I0 -sg18 -I1 -sg19 -(lsg20 -(lp37 -F0.60000000000000009 -asg21 -I1 -sg22 -Nsg23 -S'Var: 1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp38 -g3 -(lp39 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp40 -g3 -(lp41 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp42 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp43 -(lp44 -S'p1' -p45 -aF0.10000000000000001 -aI1 -aF0.10000000000000001 -aI0 -aasg16 -(lsg17 -I1 -sg18 -I0 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp46 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp47 -(lp48 -S'p4' -p49 -aF0.10000000000000001 -aI1 -aF1.1000000000000001 -aI2 -aasg16 -(lsg17 -I1 -sg18 -I2 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'2' -sbasg10 -I-666 -sg11 -(lsg12 -(lp50 -g44 -ag48 -asg16 -(lsg17 -I0 -sg18 -I3 -sg19 -(lsg20 -(lp51 -F0.60000000000000009 -asg21 -I1 -sg22 -Nsg23 -S'Var: 3' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp52 -g3 -(lp53 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp54 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp55 -(lp56 -S'p7' -p57 -aF1.1000000000000001 -aI1 -aF0.10000000000000001 -aI2 -aasg16 -(lsg17 -I1 -sg18 -I2 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'2' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp58 -g3 -(lsg10 -I-666 -sg11 -(lsg12 -(lp59 -(lp60 -S'p8' -p61 -aF1.1000000000000001 -aI1 -aF1.1000000000000001 -aI0 -aasg16 -(lsg17 -I1 -sg18 -I0 -sg19 -(lsg20 -(lsg21 -I1 -sg22 -Nsg23 -S'0' -sbasg10 -I-666 -sg11 -(lsg12 -(lp62 -g56 -ag60 -asg16 -(lsg17 -I0 -sg18 -I3 -sg19 -(lsg20 -(lp63 -F0.60000000000000009 -asg21 -I1 -sg22 -Nsg23 -S'Var: 3' -sbasg10 -I-666 -sg11 -(lsg12 -(lp64 -g44 -ag48 -ag56 -ag60 -asg16 -(lsg17 -I0 -sg18 -I1 -sg19 -(lsg20 -(lp65 -F0.60000000000000009 -asg21 -I1 -sg22 -Nsg23 -S'Var: 1' -sbasg10 -F0.393155878465832 -sg11 -(lsg12 -(lp66 -g44 -ag14 -ag26 -ag48 -ag32 -ag34 -ag56 -ag60 -asg16 -(lsg17 -I0 -sg18 -I2 -sg19 -(lsg20 -(lsg21 -I0 -sg22 -Nsg23 -S'Var: 2' -sS'_nResultCodes' -p67 -I3 -sb. diff --git a/rdkit/ML/DecTree/test_data/XValTree.pkl b/rdkit/ML/DecTree/test_data/XValTree.pkl deleted file mode 100644 index cb9c12cfe31..00000000000 --- a/rdkit/ML/DecTree/test_data/XValTree.pkl +++ /dev/null @@ -1,3650 +0,0 @@ -(irdkit.ML.DecTree.DecTree -DecTreeNode -p0 -(dp1 -S'_nResultCodes' -p2 -I2 -sS'name' -p3 -S'Var: 2' -p4 -sS'parent' -p5 -NsS'level' -p6 -I0 -sS'badExamples' -p7 -(lp8 -(lp9 -I01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aa(lp10 -I01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp11 -I00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aa(lp12 -I01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aa(lp13 -I00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aa(lp14 -I00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aa(lp15 -I01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aasS'label' -p16 -I2 -sS'terminalNode' -p17 -I0 -sS'trainingExamples' -p18 -(lp19 -(lp20 -I00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aa(lp21 -I01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aa(lp22 -I00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aa(lp23 -I01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aa(lp24 -I01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aa(lp25 -I00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aa(lp26 -I01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aa(lp27 -I01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aa(lp28 -I01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aa(lp29 -I00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp30 -I01 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aa(lp31 -I00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp32 -I00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aa(lp33 -I00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aa(lp34 -I00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp35 -I00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aa(lp36 -I01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aa(lp37 -I01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aa(lp38 -I01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aa(lp39 -I01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp40 -I01 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aa(lp41 -I01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aa(lp42 -I00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aa(lp43 -I00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aa(lp44 -I01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aa(lp45 -I00 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aa(lp46 -I00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aa(lp47 -I01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aa(lp48 -I00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aa(lp49 -I01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aa(lp50 -I01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aa(lp51 -I00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aa(lp52 -I01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp53 -I01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aa(lp54 -I01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aa(lp55 -I00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp56 -I01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp57 -I01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aa(lp58 -I00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aa(lp59 -I01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aa(lp60 -I00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aa(lp61 -I01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp62 -I01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aa(lp63 -I00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp64 -I00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aa(lp65 -I01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aa(lp66 -I01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aa(lp67 -I01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aa(lp68 -I01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp69 -I00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp70 -I01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aa(lp71 -I00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aa(lp72 -I01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aa(lp73 -I00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aa(lp74 -I00 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aa(lp75 -I01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aa(lp76 -I01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aa(lp77 -I00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aa(lp78 -I01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aa(lp79 -I01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aa(lp80 -I00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp81 -I00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aa(lp82 -I00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aa(lp83 -I01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aa(lp84 -I00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aa(lp85 -I00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aa(lp86 -I01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aa(lp87 -I01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp88 -I01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aa(lp89 -I00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aa(lp90 -I01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aa(lp91 -I00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aa(lp92 -I01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aa(lp93 -I00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp94 -I00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aa(lp95 -I01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aa(lp96 -I01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aa(lp97 -I01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp98 -I01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aa(lp99 -I00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aa(lp100 -I00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aa(lp101 -I00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp102 -I01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp103 -I01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp104 -I00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aa(lp105 -I00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp106 -I00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aa(lp107 -I01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aa(lp108 -I01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aa(lp109 -I01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp110 -I00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aa(lp111 -I00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp112 -I00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp113 -I00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp114 -I00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aa(lp115 -I01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aa(lp116 -I00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp117 -I01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp118 -I00 -aI00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aa(lp119 -I01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aa(lp120 -I01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aa(lp121 -I00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aa(lp122 -I00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp123 -I00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aa(lp124 -I00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aa(lp125 -I00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aa(lp126 -I00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aa(lp127 -I00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aa(lp128 -I01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aa(lp129 -I01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aa(lp130 -I00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp131 -I00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aa(lp132 -I01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp133 -I00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aa(lp134 -I00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aa(lp135 -I00 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aa(lp136 -I00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aa(lp137 -I00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aa(lp138 -I00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp139 -I00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp140 -I01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aa(lp141 -I00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aa(lp142 -I01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp143 -I01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aa(lp144 -I00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aa(lp145 -I00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp146 -I00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp147 -I00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp148 -I01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aa(lp149 -I00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aa(lp150 -I00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aa(lp151 -I01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp152 -I01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aa(lp153 -I01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aa(lp154 -I01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aa(lp155 -I01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp156 -I01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aasS'examples' -p157 -(lp158 -(lp159 -I00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aa(lp160 -I01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI01 -aa(lp161 -I01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aag9 -a(lp162 -I00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aag10 -a(lp163 -I01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aa(lp164 -I01 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aa(lp165 -I01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aa(lp166 -I01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp167 -I01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp168 -I01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aa(lp169 -I01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aa(lp170 -I00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aa(lp171 -I00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aa(lp172 -I01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aa(lp173 -I01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aa(lp174 -I01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aa(lp175 -I00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aa(lp176 -I01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aa(lp177 -I01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI01 -aa(lp178 -I01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aa(lp179 -I00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aa(lp180 -I01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp181 -I00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aa(lp182 -I00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aa(lp183 -I01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp184 -I01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aa(lp185 -I01 -aI00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aa(lp186 -I00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp187 -I00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aa(lp188 -I00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp189 -I01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aag11 -a(lp190 -I00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI01 -aI00 -aa(lp191 -I00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp192 -I00 -aI00 -aI00 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp193 -I01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aa(lp194 -I00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI00 -aa(lp195 -I01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp196 -I00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aa(lp197 -I00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aa(lp198 -I00 -aI01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp199 -I00 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aI00 -aa(lp200 -I01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aa(lp201 -I00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aa(lp202 -I01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aag12 -a(lp203 -I00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI00 -aag13 -a(lp204 -I00 -aI00 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI00 -aa(lp205 -I00 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aag14 -a(lp206 -I01 -aI01 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aa(lp207 -I01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aa(lp208 -I00 -aI01 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aI00 -aI01 -aI00 -aag15 -a(lp209 -I01 -aI00 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aI01 -aa(lp210 -I00 -aI01 -aI00 -aI01 -aI01 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aa(lp211 -I01 -aI00 -aI00 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI00 -aa(lp212 -I01 -aI01 -aI01 -aI01 -aI00 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aa(lp213 -I01 -aI01 -aI01 -aI00 -aI00 -aI01 -aI00 -aI01 -aI01 -aI01 -aI01 -aa(lp214 -I01 -aI01 -aI00 -aI00 -aI01 -aI01 -aI00 -aI00 -aI00 -aI00 -aI00 -aasS'_trainIndices' -p215 -(lp216 -I0 -aI1 -aI2 -aI3 -aI5 -aI7 -aI8 -aI11 -aI13 -aI15 -aI16 -aI18 -aI19 -aI22 -aI23 -aI24 -aI26 -aI27 -aI29 -aI31 -aI32 -aI36 -aI37 -aI39 -aI40 -aI41 -aI45 -aI46 -aI48 -aI49 -aI51 -aI52 -aI55 -aI59 -aI60 -aI62 -aI63 -aI66 -aI67 -aI68 -aI70 -aI71 -aI73 -aI76 -aI78 -aI79 -aI81 -aI84 -aI85 -aI86 -aI87 -aI88 -aI89 -aI90 -aI92 -aI93 -aI94 -aI95 -aI96 -aI97 -aI98 -aI99 -aI105 -aI106 -aI107 -aI110 -aI112 -aI114 -aI115 -aI116 -aI117 -aI119 -aI124 -aI125 -aI126 -aI127 -aI128 -aI129 -aI130 -aI132 -aI133 -aI135 -aI136 -aI137 -aI138 -aI139 -aI140 -aI141 -aI142 -aI143 -aI144 -aI146 -aI147 -aI148 -aI150 -aI151 -aI152 -aI153 -aI155 -aI156 -aI157 -aI158 -aI159 -aI160 -aI163 -aI164 -aI165 -aI166 -aI167 -aI168 -aI172 -aI173 -aI174 -aI175 -aI176 -aI177 -aI178 -aI179 -aI180 -aI181 -aI182 -aI183 -aI184 -aI185 -aI186 -aI187 -aI188 -aI190 -aI191 -aI192 -aI193 -aI194 -aI195 -aI196 -aI197 -aI198 -aI199 -asS'data' -p217 -F0.7582971529373276 -sS'children' -p218 -(lp219 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p220 -(dp221 -g3 -S'Var: 0' -p222 -sg5 -Nsg6 -I1 -sg7 -(lp223 -sg16 -I0 -sg17 -I0 -sg18 -(lp224 -sg157 -(lp225 -g9 -ag162 -ag163 -ag164 -ag165 -ag166 -ag167 -ag168 -ag170 -ag172 -ag175 -ag178 -ag185 -ag186 -ag187 -ag188 -ag190 -ag191 -ag192 -ag193 -ag195 -ag197 -ag200 -ag201 -ag202 -ag12 -ag205 -ag208 -ag15 -ag210 -ag211 -ag214 -asg217 -F0.18116640155354566 -sg218 -(lp226 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p227 -(dp228 -g3 -S'0' -p229 -sg5 -Nsg6 -I1 -sg7 -(lp230 -sg16 -I0 -sg17 -I1 -sg18 -(lp231 -sg157 -(lp232 -g162 -ag170 -ag175 -ag186 -ag187 -ag188 -ag190 -ag191 -ag192 -ag197 -ag201 -ag205 -ag208 -ag210 -asg217 -F0.0 -sg218 -(lp233 -sS'testExamples' -p234 -(lp235 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p236 -(dp237 -g3 -S'Var: 8' -p238 -sg5 -Nsg6 -I1 -sg7 -(lp239 -sg16 -I8 -sg17 -I0 -sg18 -(lp240 -sg157 -(lp241 -g9 -ag163 -ag164 -ag165 -ag166 -ag167 -ag168 -ag172 -ag178 -ag185 -ag193 -ag195 -ag200 -ag202 -ag12 -ag15 -ag211 -ag214 -asg217 -F0.3712323266408757 -sg218 -(lp242 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p243 -(dp244 -g3 -S'0' -p245 -sg5 -Nsg6 -I1 -sg7 -(lp246 -sg16 -I0 -sg17 -I1 -sg18 -(lp247 -sg157 -(lp248 -g163 -ag166 -ag178 -ag193 -ag195 -ag200 -ag12 -ag15 -ag211 -ag214 -asg217 -F0.0 -sg218 -(lp249 -sg234 -(lp250 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p251 -(dp252 -g3 -S'Var: 1' -p253 -sg5 -Nsg6 -I1 -sg7 -(lp254 -sg16 -I1 -sg17 -I0 -sg18 -(lp255 -sg157 -(lp256 -g9 -ag164 -ag165 -ag167 -ag168 -ag172 -ag185 -ag202 -asg217 -F0.7642045065086203 -sg218 -(lp257 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p258 -(dp259 -g3 -S'0' -p260 -sg5 -Nsg6 -I1 -sg7 -(lp261 -sg16 -I0 -sg17 -I1 -sg18 -(lp262 -sg157 -(lp263 -g164 -ag165 -ag167 -ag172 -ag185 -ag202 -asg217 -F0.0 -sg218 -(lp264 -sg234 -(lp265 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p266 -(dp267 -g3 -S'Var: 7' -p268 -sg5 -Nsg6 -I1 -sg7 -(lp269 -sg16 -I7 -sg17 -I0 -sg18 -(lp270 -sg157 -(lp271 -g9 -ag168 -asg217 -F1.0 -sg218 -(lp272 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p273 -(dp274 -g3 -S'1' -p275 -sg5 -Nsg6 -I1 -sg7 -(lp276 -sg16 -I1 -sg17 -I1 -sg18 -(lp277 -sg157 -(lp278 -g9 -asg217 -F0.0 -sg218 -(lp279 -sg234 -(lp280 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p281 -(dp282 -g3 -S'0' -p283 -sg5 -Nsg6 -I1 -sg7 -(lp284 -sg16 -I0 -sg17 -I1 -sg18 -(lp285 -sg157 -(lp286 -g168 -asg217 -F0.0 -sg218 -(lp287 -sg234 -(lp288 -sbasg234 -(lp289 -sbasg234 -(lp290 -sbasg234 -(lp291 -sbasg234 -(lp292 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p293 -(dp294 -g3 -S'Var: 1' -p295 -sg5 -Nsg6 -I1 -sg7 -(lp296 -sg16 -I1 -sg17 -I0 -sg18 -(lp297 -sg157 -(lp298 -g159 -ag160 -ag161 -ag10 -ag169 -ag171 -ag173 -ag174 -ag176 -ag177 -ag179 -ag180 -ag181 -ag182 -ag183 -ag184 -ag189 -ag11 -ag194 -ag196 -ag198 -ag199 -ag203 -ag13 -ag204 -ag14 -ag206 -ag207 -ag209 -ag212 -ag213 -asg217 -F0.9886994082884974 -sg218 -(lp299 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p300 -(dp301 -g3 -S'Var: 6' -p302 -sg5 -Nsg6 -I1 -sg7 -(lp303 -sg16 -I6 -sg17 -I0 -sg18 -(lp304 -sg157 -(lp305 -g159 -ag161 -ag171 -ag176 -ag179 -ag180 -ag181 -ag183 -ag184 -ag189 -ag194 -ag196 -ag199 -ag203 -ag204 -ag207 -ag209 -asg217 -F0.7219280948873623 -sg218 -(lp306 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p307 -(dp308 -g3 -S'0' -p309 -sg5 -Nsg6 -I1 -sg7 -(lp310 -sg16 -I0 -sg17 -I1 -sg18 -(lp311 -sg157 -(lp312 -g161 -ag176 -ag179 -ag180 -ag183 -ag184 -ag194 -ag203 -asg217 -F0.0 -sg218 -(lp313 -sg234 -(lp314 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p315 -(dp316 -g3 -S'Var: 0' -p317 -sg5 -Nsg6 -I1 -sg7 -(lp318 -sg16 -I0 -sg17 -I0 -sg18 -(lp319 -sg157 -(lp320 -g159 -ag171 -ag181 -ag189 -ag196 -ag199 -ag204 -ag207 -ag209 -asg217 -F1.0 -sg218 -(lp321 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p322 -(dp323 -g3 -S'0' -p324 -sg5 -Nsg6 -I1 -sg7 -(lp325 -sg16 -I0 -sg17 -I1 -sg18 -(lp326 -sg157 -(lp327 -g159 -ag171 -ag181 -ag196 -ag199 -ag204 -asg217 -F0.0 -sg218 -(lp328 -sg234 -(lp329 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p330 -(dp331 -g3 -S'Var: 4' -p332 -sg5 -Nsg6 -I1 -sg7 -(lp333 -sg16 -I4 -sg17 -I0 -sg18 -(lp334 -sg157 -(lp335 -g189 -ag207 -ag209 -asg217 -F0.5435644431995964 -sg218 -(lp336 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p337 -(dp338 -g3 -S'0' -p339 -sg5 -Nsg6 -I1 -sg7 -(lp340 -sg16 -I0 -sg17 -I1 -sg18 -(lp341 -sg157 -(lp342 -sg217 -F0.0 -sg218 -(lp343 -sg234 -(lp344 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p345 -(dp346 -g3 -S'1' -p347 -sg5 -Nsg6 -I1 -sg7 -(lp348 -sg16 -I1 -sg17 -I1 -sg18 -(lp349 -sg157 -(lp350 -g189 -ag207 -ag209 -asg217 -F0.0 -sg218 -(lp351 -sg234 -(lp352 -sbasg234 -(lp353 -sbasg234 -(lp354 -sbasg234 -(lp355 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p356 -(dp357 -g3 -S'Var: 0' -p358 -sg5 -Nsg6 -I1 -sg7 -(lp359 -sg16 -I0 -sg17 -I0 -sg18 -(lp360 -sg157 -(lp361 -g160 -ag10 -ag169 -ag173 -ag174 -ag177 -ag182 -ag11 -ag198 -ag13 -ag14 -ag206 -ag212 -ag213 -asg217 -F0.8497511372532974 -sg218 -(lp362 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p363 -(dp364 -g3 -S'Var: 5' -p365 -sg5 -Nsg6 -I1 -sg7 -(lp366 -sg16 -I5 -sg17 -I0 -sg18 -(lp367 -sg157 -(lp368 -g182 -ag11 -ag198 -ag13 -ag14 -asg217 -F0.8812908992306927 -sg218 -(lp369 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p370 -(dp371 -g3 -S'0' -p372 -sg5 -Nsg6 -I1 -sg7 -(lp373 -sg16 -I0 -sg17 -I1 -sg18 -(lp374 -sg157 -(lp375 -g182 -ag198 -ag14 -asg217 -F0.0 -sg218 -(lp376 -sg234 -(lp377 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p378 -(dp379 -g3 -S'1' -p380 -sg5 -Nsg6 -I1 -sg7 -(lp381 -sg16 -I1 -sg17 -I1 -sg18 -(lp382 -sg157 -(lp383 -g11 -ag13 -asg217 -F0.0 -sg218 -(lp384 -sg234 -(lp385 -sbasg234 -(lp386 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p387 -(dp388 -g3 -S'Var: 9' -p389 -sg5 -Nsg6 -I1 -sg7 -(lp390 -sg16 -I9 -sg17 -I0 -sg18 -(lp391 -sg157 -(lp392 -g160 -ag10 -ag169 -ag173 -ag174 -ag177 -ag206 -ag212 -ag213 -asg217 -F0.2974722489192897 -sg218 -(lp393 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p394 -(dp395 -g3 -S'Var: 3' -p396 -sg5 -Nsg6 -I1 -sg7 -(lp397 -sg16 -I3 -sg17 -I0 -sg18 -(lp398 -sg157 -(lp399 -g160 -ag10 -ag174 -ag212 -asg217 -F0.6500224216483541 -sg218 -(lp400 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p401 -(dp402 -g3 -S'1' -p403 -sg5 -Nsg6 -I1 -sg7 -(lp404 -sg16 -I1 -sg17 -I1 -sg18 -(lp405 -sg157 -(lp406 -g10 -asg217 -F0.0 -sg218 -(lp407 -sg234 -(lp408 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p409 -(dp410 -g3 -S'Var: 5' -p411 -sg5 -Nsg6 -I1 -sg7 -(lp412 -sg16 -I5 -sg17 -I0 -sg18 -(lp413 -sg157 -(lp414 -g160 -ag174 -ag212 -asg217 -F0.9182958340544894 -sg218 -(lp415 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p416 -(dp417 -g3 -S'Var: 6' -p418 -sg5 -Nsg6 -I1 -sg7 -(lp419 -sg16 -I6 -sg17 -I0 -sg18 -(lp420 -sg157 -(lp421 -g212 -asg217 -F1.0 -sg218 -(lp422 -(irdkit.ML.DecTree.DecTree -DecTreeNode -p423 -(dp424 -g3 -S'0' -p425 -sg5 -Nsg6 -I1 -sg7 -(lp426 -sg16 -I0 -sg17 -I1 -sg18 -(lp427 -sg157 -(lp428 -g212 -asg217 -F0.0 -sg218 -(lp429 -sg234 -(lp430 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p431 -(dp432 -g3 -S'1' -p433 -sg5 -Nsg6 -I1 -sg7 -(lp434 -sg16 -I1 -sg17 -I1 -sg18 -(lp435 -sg157 -(lp436 -sg217 -F0.0 -sg218 -(lp437 -sg234 -(lp438 -sbasg234 -(lp439 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p440 -(dp441 -g3 -S'1' -p442 -sg5 -Nsg6 -I1 -sg7 -(lp443 -sg16 -I1 -sg17 -I1 -sg18 -(lp444 -sg157 -(lp445 -g160 -ag174 -asg217 -F0.0 -sg218 -(lp446 -sg234 -(lp447 -sbasg234 -(lp448 -sbasg234 -(lp449 -sba(irdkit.ML.DecTree.DecTree -DecTreeNode -p450 -(dp451 -g3 -S'1' -p452 -sg5 -Nsg6 -I1 -sg7 -(lp453 -sg16 -I1 -sg17 -I1 -sg18 -(lp454 -sg157 -(lp455 -g169 -ag173 -ag177 -ag206 -ag213 -asg217 -F0.0 -sg218 -(lp456 -sg234 -(lp457 -sbasg234 -(lp458 -sbasg234 -(lp459 -sbasg234 -(lp460 -sbasg234 -(lp461 -g159 -ag160 -ag161 -ag9 -ag162 -ag10 -ag163 -ag164 -ag165 -ag166 -ag167 -ag168 -ag169 -ag170 -ag171 -ag172 -ag173 -ag174 -ag175 -ag176 -ag177 -ag178 -ag179 -ag180 -ag181 -ag182 -ag183 -ag184 -ag185 -ag186 -ag187 -ag188 -ag189 -ag11 -ag190 -ag191 -ag192 -ag193 -ag194 -ag195 -ag196 -ag197 -ag198 -ag199 -ag200 -ag201 -ag202 -ag12 -ag203 -ag13 -ag204 -ag205 -ag14 -ag206 -ag207 -ag208 -ag15 -ag209 -ag210 -ag211 -ag212 -ag213 -ag214 -asb. \ No newline at end of file diff --git a/rdkit/ML/DecTree/test_data/cdk2-few.pkl.gz b/rdkit/ML/DecTree/test_data/cdk2-few.pkl.gz deleted file mode 100644 index 79f403739a5..00000000000 Binary files a/rdkit/ML/DecTree/test_data/cdk2-few.pkl.gz and /dev/null differ diff --git a/rdkit/ML/EnrichPlot.py b/rdkit/ML/EnrichPlot.py deleted file mode 100755 index 6b423a8ba4b..00000000000 --- a/rdkit/ML/EnrichPlot.py +++ /dev/null @@ -1,483 +0,0 @@ -# $Id$ -# -# Copyright (C) 2002-2006 greg Landrum and Rational Discovery LLC -# -# @@ All Rights Reserved @@ -# This file is part of the RDKit. -# The contents are covered by the terms of the BSD license -# which is included in the file license.txt, found at the root -# of the RDKit source tree. -# -"""Command line tool to construct an enrichment plot from saved composite models - -Usage: EnrichPlot [optional args] -d dbname -t tablename - -Required Arguments: - -d "dbName": the name of the database for screening - - -t "tablename": provide the name of the table with the data to be screened - - : file name(s) of pickled composite model(s). - If the -p argument is also provided (see below), this argument is ignored. - -Optional Arguments: - - -a "list": the list of result codes to be considered active. This will be - eval'ed, so be sure that it evaluates as a list or sequence of - integers. For example, -a "[1,2]" will consider activity values 1 and 2 - to be active - - - --enrich "list": identical to the -a argument above. - - - --thresh: sets a threshold for the plot. If the confidence falls below - this value, picking will be terminated - - - -H: screen only the hold out set (works only if a version of - BuildComposite more recent than 1.2.2 was used). - - - -T: screen only the training set (works only if a version of - BuildComposite more recent than 1.2.2 was used). - - - -S: shuffle activity values before screening - - - -R: randomize activity values before screening - - - -F *filter frac*: filters the data before training to change the - distribution of activity values in the training set. *filter frac* - is the fraction of the training set that should have the target value. - **See note in BuildComposite help about data filtering** - - - -v *filter value*: filters the data before training to change the - distribution of activity values in the training set. *filter value* - is the target value to use in filtering. - **See note in BuildComposite help about data filtering** - - - -p "tableName": provides the name of a db table containing the - models to be screened. If you use this argument, you should also - use the -N argument (below) to specify a note value. - - - -N "note": provides a note to be used to pull models from a db table. - - - --plotFile "filename": writes the data to an output text file (filename.dat) - and creates a gnuplot input file (filename.gnu) to plot it - - - --showPlot: causes the gnuplot plot constructed using --plotFile to be - displayed in gnuplot. - -""" -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) -# from rdkit.Dbase.DbConnection import DbConnect - -import pickle -import sys - -import numpy - -from rdkit import DataStructs, RDConfig -from rdkit.Dbase.DbConnection import DbConnect -from rdkit.ML import CompositeRun -from rdkit.ML.Data import DataUtils, SplitData, Stats - -__VERSION_STRING = "2.4.0" - - -def cmp(t1, t2): - return (t1 < t2) * -1 or (t1 > t2) * 1 - - -def message(msg, noRet=0, dest=sys.stderr): - """ emits messages to _sys.stderr_ - override this in modules which import this one to redirect output - - **Arguments** - - - msg: the string to be displayed - - """ - if noRet: - dest.write('%s ' % (msg)) - else: - dest.write('%s\n' % (msg)) - - -def error(msg, dest=sys.stderr): - """ emits messages to _sys.stderr_ - override this in modules which import this one to redirect output - - **Arguments** - - - msg: the string to be displayed - - """ - sys.stderr.write('ERROR: %s\n' % (msg)) - - -def ScreenModel(mdl, descs, data, picking=[1], indices=[], errorEstimate=0): - """ collects the results of screening an individual composite model that match - a particular value - - **Arguments** - - - mdl: the composite model - - - descs: a list of descriptor names corresponding to the data set - - - data: the data set, a list of points to be screened. - - - picking: (Optional) a list of values that are to be collected. - For examples, if you want an enrichment plot for picking the values - 1 and 2, you'd having picking=[1,2]. - - **Returns** - - a list of 4-tuples containing: - - - the id of the point - - - the true result (from the data set) - - - the predicted result - - - the confidence value for the prediction - - """ - mdl.SetInputOrder(descs) - - for j in range(len(mdl)): - tmp = mdl.GetModel(j) - if hasattr(tmp, '_trainIndices') and not isinstance(tmp._trainIndices, dict): - tis = {} - if hasattr(tmp, '_trainIndices'): - for v in tmp._trainIndices: - tis[v] = 1 - tmp._trainIndices = tis - - res = [] - if mdl.GetQuantBounds(): - needsQuant = 1 - else: - needsQuant = 0 - - if not indices: - indices = list(range(len(data))) - nTrueActives = 0 - for i in indices: - if errorEstimate: - use = [] - for j in range(len(mdl)): - tmp = mdl.GetModel(j) - if not tmp._trainIndices.get(i, 0): - use.append(j) - else: - use = None - pt = data[i] - pred, conf = mdl.ClassifyExample(pt, onlyModels=use) - if needsQuant: - pt = mdl.QuantizeActivity(pt[:]) - trueRes = pt[-1] - if trueRes in picking: - nTrueActives += 1 - if pred in picking: - res.append((pt[0], trueRes, pred, conf)) - return nTrueActives, res - - -def AccumulateCounts(predictions, thresh=0, sortIt=1): - """ Accumulates the data for the enrichment plot for a single model - - **Arguments** - - - predictions: a list of 3-tuples (as returned by _ScreenModels_) - - - thresh: a threshold for the confidence level. Anything below - this threshold will not be considered - - - sortIt: toggles sorting on confidence levels - - - **Returns** - - - a list of 3-tuples: - - - the id of the active picked here - - - num actives found so far - - - number of picks made so far - - """ - if sortIt: - predictions.sort(lambda x, y: cmp(y[3], x[3])) - res = [] - nCorrect = 0 - nPts = 0 - for i in range(len(predictions)): - ID, real, pred, conf = predictions[i] - if conf > thresh: - if pred == real: - nCorrect += 1 - nPts += 1 - res.append((ID, nCorrect, nPts)) - - return res - - -def MakePlot(details, final, counts, pickVects, nModels, nTrueActs=-1): - if not hasattr(details, 'plotFile') or not details.plotFile: - return - - dataFileName = '%s.dat' % (details.plotFile) - outF = open(dataFileName, 'w+') - i = 0 - while i < len(final) and counts[i] != 0: - if nModels > 1: - _, sd = Stats.MeanAndDev(pickVects[i]) - confInterval = Stats.GetConfidenceInterval(sd, len(pickVects[i]), level=90) - outF.write('%d %f %f %d %f\n' % - (i + 1, final[i][0] / counts[i], final[i][1] / counts[i], counts[i], confInterval)) - else: - outF.write('%d %f %f %d\n' % - (i + 1, final[i][0] / counts[i], final[i][1] / counts[i], counts[i])) - i += 1 - outF.close() - plotFileName = '%s.gnu' % (details.plotFile) - gnuF = open(plotFileName, 'w+') - gnuHdr = """# Generated by EnrichPlot.py version: %s - set size square 0.7 - set xr [0:] - set data styl points - set ylab 'Num Correct Picks' - set xlab 'Num Picks' - set grid - set nokey - set term postscript enh color solid "Helvetica" 16 - set term X - """ % (__VERSION_STRING) - print(gnuHdr, file=gnuF) - if nTrueActs > 0: - print('set yr [0:%d]' % nTrueActs, file=gnuF) - print('plot x with lines', file=gnuF) - if nModels > 1: - everyGap = i / 20 - print('replot "%s" using 1:2 with lines,' % (dataFileName), end='', file=gnuF) - print('"%s" every %d using 1:2:5 with yerrorbars' % (dataFileName, everyGap), file=gnuF) - else: - print('replot "%s" with points' % (dataFileName), file=gnuF) - gnuF.close() - - if hasattr(details, 'showPlot') and details.showPlot: - try: - from Gnuplot import Gnuplot - p = Gnuplot() - p('load "%s"' % (plotFileName)) - input('press return to continue...\n') - except Exception: - import traceback - traceback.print_exc() - - -def Usage(): - """ displays a usage message and exits """ - sys.stderr.write(__doc__) - sys.exit(-1) - - -if __name__ == '__main__': - import getopt - try: - args, extras = getopt.getopt(sys.argv[1:], 'd:t:a:N:p:cSTHF:v:', - ('thresh=', 'plotFile=', 'showPlot', 'pickleCol=', 'OOB', 'noSort', - 'pickBase=', 'doROC', 'rocThresh=', 'enrich=')) - except Exception: - import traceback - traceback.print_exc() - Usage() - - details = CompositeRun.CompositeRun() - CompositeRun.SetDefaults(details) - - details.activeTgt = [1] - details.doTraining = 0 - details.doHoldout = 0 - details.dbTableName = '' - details.plotFile = '' - details.showPlot = 0 - details.pickleCol = -1 - details.errorEstimate = 0 - details.sortIt = 1 - details.pickBase = '' - details.doROC = 0 - details.rocThresh = -1 - for arg, val in args: - if arg == '-d': - details.dbName = val - if arg == '-t': - details.dbTableName = val - elif arg == '-a' or arg == '--enrich': - details.activeTgt = eval(val) - if not isinstance(details.activeTgt, (tuple, list)): - # if (type(details.activeTgt) not in (types.TupleType, types.ListType)): - details.activeTgt = (details.activeTgt, ) - - elif arg == '--thresh': - details.threshold = float(val) - elif arg == '-N': - details.note = val - elif arg == '-p': - details.persistTblName = val - elif arg == '-S': - details.shuffleActivities = 1 - elif arg == '-H': - details.doTraining = 0 - details.doHoldout = 1 - elif arg == '-T': - details.doTraining = 1 - details.doHoldout = 0 - elif arg == '-F': - details.filterFrac = float(val) - elif arg == '-v': - details.filterVal = float(val) - elif arg == '--plotFile': - details.plotFile = val - elif arg == '--showPlot': - details.showPlot = 1 - elif arg == '--pickleCol': - details.pickleCol = int(val) - 1 - elif arg == '--OOB': - details.errorEstimate = 1 - elif arg == '--noSort': - details.sortIt = 0 - elif arg == '--doROC': - details.doROC = 1 - elif arg == '--rocThresh': - details.rocThresh = int(val) - elif arg == '--pickBase': - details.pickBase = val - - if not details.dbName or not details.dbTableName: - Usage() - print('*******Please provide both the -d and -t arguments') - - message('Building Data set\n') - dataSet = DataUtils.DBToData(details.dbName, details.dbTableName, user=RDConfig.defaultDBUser, - password=RDConfig.defaultDBPassword, pickleCol=details.pickleCol, - pickleClass=DataStructs.ExplicitBitVect) - - descs = dataSet.GetVarNames() - nPts = dataSet.GetNPts() - message('npts: %d\n' % (nPts)) - final = numpy.zeros((nPts, 2), float) - counts = numpy.zeros(nPts, numpy.integer) - selPts = [None] * nPts - - models = [] - if details.persistTblName: - conn = DbConnect(details.dbName, details.persistTblName) - message('-> Retrieving models from database') - curs = conn.GetCursor() - curs.execute("select model from %s where note='%s'" % (details.persistTblName, details.note)) - message('-> Reconstructing models') - try: - blob = curs.fetchone() - except Exception: - blob = None - while blob: - message(' Building model %d' % len(models)) - blob = blob[0] - try: - models.append(pickle.loads(str(blob))) - except Exception: - import traceback - traceback.print_exc() - print('Model failed') - else: - message(' <-Done') - try: - blob = curs.fetchone() - except Exception: - blob = None - curs = None - else: - for modelName in extras: - try: - model = pickle.load(open(modelName, 'rb')) - except Exception: - import traceback - print('problems with model %s:' % modelName) - traceback.print_exc() - else: - models.append(model) - nModels = len(models) - pickVects = {} - halfwayPts = [1e8] * len(models) - for whichModel, model in enumerate(models): - tmpD = dataSet - try: - seed = model._randomSeed - except AttributeError: - pass - else: - DataUtils.InitRandomNumbers(seed) - if details.shuffleActivities: - DataUtils.RandomizeActivities(tmpD, shuffle=1) - if hasattr(model, '_splitFrac') and (details.doHoldout or details.doTraining): - trainIdx, testIdx = SplitData.SplitIndices(tmpD.GetNPts(), model._splitFrac, silent=1) - if details.filterFrac != 0.0: - trainFilt, temp = DataUtils.FilterData(tmpD, details.filterVal, details.filterFrac, -1, - indicesToUse=trainIdx, indicesOnly=1) - testIdx += temp - trainIdx = trainFilt - if details.doTraining: - testIdx, trainIdx = trainIdx, testIdx - else: - testIdx = list(range(tmpD.GetNPts())) - - message('screening %d examples' % (len(testIdx))) - nTrueActives, screenRes = ScreenModel(model, descs, tmpD, picking=details.activeTgt, - indices=testIdx, errorEstimate=details.errorEstimate) - message('accumulating') - runningCounts = AccumulateCounts(screenRes, sortIt=details.sortIt, thresh=details.threshold) - if details.pickBase: - pickFile = open('%s.%d.picks' % (details.pickBase, whichModel + 1), 'w+') - else: - pickFile = None - - for i, entry in enumerate(runningCounts): - entry = runningCounts[i] - selPts[i] = entry[0] - final[i][0] += entry[1] - final[i][1] += entry[2] - v = pickVects.get(i, []) - v.append(entry[1]) - pickVects[i] = v - counts[i] += 1 - if pickFile: - pickFile.write('%s\n' % (entry[0])) - if entry[1] >= nTrueActives / 2 and entry[2] < halfwayPts[whichModel]: - halfwayPts[whichModel] = entry[2] - message('Halfway point: %d\n' % halfwayPts[whichModel]) - - if details.plotFile: - MakePlot(details, final, counts, pickVects, nModels, nTrueActs=nTrueActives) - else: - if nModels > 1: - print('#Index\tAvg_num_correct\tConf90Pct\tAvg_num_picked\tNum_picks\tlast_selection') - else: - print('#Index\tAvg_num_correct\tAvg_num_picked\tNum_picks\tlast_selection') - - i = 0 - while i < nPts and counts[i] != 0: - if nModels > 1: - mean, sd = Stats.MeanAndDev(pickVects[i]) - confInterval = Stats.GetConfidenceInterval(sd, len(pickVects[i]), level=90) - print('%d\t%f\t%f\t%f\t%d\t%s' % (i + 1, final[i][0] / counts[i], confInterval, - final[i][1] / counts[i], counts[i], str(selPts[i]))) - else: - print('%d\t%f\t%f\t%d\t%s' % - (i + 1, final[i][0] / counts[i], final[i][1] / counts[i], counts[i], str(selPts[i]))) - i += 1 - - mean, sd = Stats.MeanAndDev(halfwayPts) - print('Halfway point: %.2f(%.2f)' % (mean, sd)) diff --git a/rdkit/ML/GrowComposite.py b/rdkit/ML/GrowComposite.py deleted file mode 100755 index 122da0ec1c7..00000000000 --- a/rdkit/ML/GrowComposite.py +++ /dev/null @@ -1,585 +0,0 @@ -# $Id$ -# -# Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC -# -# @@ All Rights Reserved @@ -# This file is part of the RDKit. -# The contents are covered by the terms of the BSD license -# which is included in the file license.txt, found at the root -# of the RDKit source tree. -# -""" command line utility for growing composite models - -**Usage** - - _GrowComposite [optional args] filename_ - -**Command Line Arguments** - - - -n *count*: number of new models to build - - - -C *pickle file name*: name of file containing composite upon which to build. - - - --inNote *note*: note to be used in loading composite models from the database - for growing - - - --balTable *table name*: table from which to take the original data set - (for balancing) - - - --balWeight *weight*: (between 0 and 1) weighting factor for the new data - (for balancing). OR, *weight* can be a list of weights - - - --balCnt *count*: number of individual models in the balanced composite - (for balancing) - - - --balH: use only the holdout set from the original data set in the balancing - (for balancing) - - - --balT: use only the training set from the original data set in the balancing - (for balancing) - - - -S: shuffle the original data set - (for balancing) - - - -r: randomize the activities of the original data set - (for balancing) - - - -N *note*: note to be attached to the grown composite when it's saved in the - database - - - --outNote *note*: equivalent to -N - - - -o *filename*: name of an output file to hold the pickled composite after - it has been grown. - If multiple balance weights are used, the weights will be added to - the filenames. - - - -L *limit*: provide an (integer) limit on individual model complexity - - - -d *database name*: instead of reading the data from a QDAT file, - pull it from a database. In this case, the _filename_ argument - provides the name of the database table containing the data set. - - - -p *tablename*: store persistence data in the database - in table *tablename* - - - -l: locks the random number generator to give consistent sets - of training and hold-out data. This is primarily intended - for testing purposes. - - - -g: be less greedy when training the models. - - - -G *number*: force trees to be rooted at descriptor *number*. - - - -D: show a detailed breakdown of the composite model performance - across the training and, when appropriate, hold-out sets. - - - -t *threshold value*: use high-confidence predictions for the final - analysis of the hold-out data. - - - -q *list string*: Add QuantTrees to the composite and use the list - specified in *list string* as the number of target quantization - bounds for each descriptor. Don't forget to include 0's at the - beginning and end of *list string* for the name and value fields. - For example, if there are 4 descriptors and you want 2 quant bounds - apiece, you would use _-q "[0,2,2,2,2,0]"_. - Two special cases: - 1) If you would like to ignore a descriptor in the model building, - use '-1' for its number of quant bounds. - 2) If you have integer valued data that should not be quantized - further, enter 0 for that descriptor. - - - -V: print the version number and exit - -""" -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) - -import pickle -import sys -import time - -import numpy - -from rdkit.Dbase.DbConnection import DbConnect -from rdkit.ML import BuildComposite, CompositeRun, ScreenComposite -from rdkit.ML.Composite import AdjustComposite -from rdkit.ML.Data import DataUtils, SplitData - -_runDetails = CompositeRun.CompositeRun() - -__VERSION_STRING = "0.5.0" - -_verbose = 1 - - -def message(msg): - """ emits messages to _sys.stdout_ - override this in modules which import this one to redirect output - - **Arguments** - - - msg: the string to be displayed - - """ - if _verbose: - sys.stdout.write('%s\n' % (msg)) - - -def GrowIt(details, composite, progressCallback=None, saveIt=1, setDescNames=0, data=None): - """ does the actual work of building a composite model - - **Arguments** - - - details: a _CompositeRun.CompositeRun_ object containing details - (options, parameters, etc.) about the run - - - composite: the composite model to grow - - - progressCallback: (optional) a function which is called with a single - argument (the number of models built so far) after each model is built. - - - saveIt: (optional) if this is nonzero, the resulting model will be pickled - and dumped to the filename specified in _details.outName_ - - - setDescNames: (optional) if nonzero, the composite's _SetInputOrder()_ method - will be called using the results of the data set's _GetVarNames()_ method; - it is assumed that the details object has a _descNames attribute which - is passed to the composites _SetDescriptorNames()_ method. Otherwise - (the default), _SetDescriptorNames()_ gets the results of _GetVarNames()_. - - - data: (optional) the data set to be used. If this is not provided, the - data set described in details will be used. - - **Returns** - - the enlarged composite model - - - """ - details.rundate = time.asctime() - - if data is None: - fName = details.tableName.strip() - if details.outName == '': - details.outName = fName + '.pkl' - if details.dbName == '': - data = DataUtils.BuildQuantDataSet(fName) - elif details.qBounds != []: - details.tableName = fName - data = details.GetDataSet() - else: - data = DataUtils.DBToQuantData( # Function no longer defined - details.dbName, fName, quantName=details.qTableName, user=details.dbUser, - password=details.dbPassword) - - seed = composite._randomSeed - DataUtils.InitRandomNumbers(seed) - if details.shuffleActivities == 1: - DataUtils.RandomizeActivities(data, shuffle=1, runDetails=details) - elif details.randomActivities == 1: - DataUtils.RandomizeActivities(data, shuffle=0, runDetails=details) - - namedExamples = data.GetNamedData() - trainExamples = namedExamples - nExamples = len(trainExamples) - message('Training with %d examples' % (nExamples)) - message('\t%d descriptors' % (len(trainExamples[0]) - 2)) - nVars = data.GetNVars() - nPossibleVals = composite.nPossibleVals - attrs = list(range(1, nVars + 1)) - - if details.useTrees: - from rdkit.ML.DecTree import CrossValidate, PruneTree - if details.qBounds != []: - from rdkit.ML.DecTree import BuildQuantTree - builder = BuildQuantTree.QuantTreeBoot - else: - from rdkit.ML.DecTree import ID3 - builder = ID3.ID3Boot - driver = CrossValidate.CrossValidationDriver - pruner = PruneTree.PruneTree - - if setDescNames: - composite.SetInputOrder(data.GetVarNames()) - composite.Grow(trainExamples, attrs, [0] + nPossibleVals, buildDriver=driver, pruner=pruner, - nTries=details.nModels, pruneIt=details.pruneIt, lessGreedy=details.lessGreedy, - needsQuantization=0, treeBuilder=builder, nQuantBounds=details.qBounds, - startAt=details.startAt, maxDepth=details.limitDepth, - progressCallback=progressCallback, silent=not _verbose) - - else: - from rdkit.ML.Neural import CrossValidate - driver = CrossValidate.CrossValidationDriver - composite.Grow(trainExamples, attrs, [0] + nPossibleVals, nTries=details.nModels, - buildDriver=driver, needsQuantization=0) - - composite.AverageErrors() - composite.SortModels() - modelList, counts, avgErrs = composite.GetAllData() - counts = numpy.array(counts) - avgErrs = numpy.array(avgErrs) - composite._varNames = data.GetVarNames() - - for i in range(len(modelList)): - modelList[i].NameModel(composite._varNames) - - # do final statistics - weightedErrs = counts * avgErrs - averageErr = sum(weightedErrs) / sum(counts) - devs = (avgErrs - averageErr) - devs = devs * counts - devs = numpy.sqrt(devs * devs) - avgDev = sum(devs) / sum(counts) - if _verbose: - message('# Overall Average Error: %%% 5.2f, Average Deviation: %%% 6.2f' % - (100. * averageErr, 100. * avgDev)) - - if details.bayesModel: - composite.Train(trainExamples, verbose=0) - - badExamples = [] - if not details.detailedRes: - if _verbose: - message('Testing all examples') - wrong = BuildComposite.testall(composite, namedExamples, badExamples) - if _verbose: - message('%d examples (%% %5.2f) were misclassified' % - (len(wrong), 100. * float(len(wrong)) / float(len(namedExamples)))) - _runDetails.overall_error = float(len(wrong)) / len(namedExamples) - - if details.detailedRes: - if _verbose: - message('\nEntire data set:') - resTup = ScreenComposite.ShowVoteResults(range(data.GetNPts()), data, composite, - nPossibleVals[-1], details.threshold) - nGood, nBad, _, avgGood, avgBad, _, voteTab = resTup - nPts = len(namedExamples) - nClass = nGood + nBad - _runDetails.overall_error = float(nBad) / nClass - _runDetails.overall_correct_conf = avgGood - _runDetails.overall_incorrect_conf = avgBad - _runDetails.overall_result_matrix = repr(voteTab) - nRej = nClass - nPts - if nRej > 0: - _runDetails.overall_fraction_dropped = float(nRej) / nPts - - return composite - - -def GetComposites(details): - res = [] - if details.persistTblName and details.inNote: - conn = DbConnect(details.dbName, details.persistTblName) - mdls = conn.GetData(fields='MODEL', where="where note='%s'" % (details.inNote)) - for row in mdls: - rawD = row[0] - res.append(pickle.loads(str(rawD))) - elif details.composFileName: - res.append(pickle.load(open(details.composFileName, 'rb'))) - return res - - -def BalanceComposite(details, composite, data1=None, data2=None): - """ balances the composite using the parameters provided in details - - **Arguments** - - - details a _CompositeRun.RunDetails_ object - - - composite: the composite model to be balanced - - - data1: (optional) if provided, this should be the - data set used to construct the original models - - - data2: (optional) if provided, this should be the - data set used to construct the new individual models - - """ - if not details.balCnt or details.balCnt > len(composite): - return composite - message("Balancing Composite") - - # - # start by getting data set 1: which is the data set used to build the - # original models - # - if data1 is None: - message("\tReading First Data Set") - fName = details.balTable.strip() - tmp = details.tableName - details.tableName = fName - dbName = details.dbName - details.dbName = details.balDb - data1 = details.GetDataSet() - details.tableName = tmp - details.dbName = dbName - if data1 is None: - return composite - details.splitFrac = composite._splitFrac - details.randomSeed = composite._randomSeed - DataUtils.InitRandomNumbers(details.randomSeed) - if details.shuffleActivities == 1: - DataUtils.RandomizeActivities(data1, shuffle=1, runDetails=details) - elif details.randomActivities == 1: - DataUtils.RandomizeActivities(data1, shuffle=0, runDetails=details) - namedExamples = data1.GetNamedData() - if details.balDoHoldout or details.balDoTrain: - trainIdx, testIdx = SplitData.SplitIndices(len(namedExamples), details.splitFrac, silent=1) - trainExamples = [namedExamples[x] for x in trainIdx] - testExamples = [namedExamples[x] for x in testIdx] - if details.filterFrac != 0.0: - trainIdx, temp = DataUtils.FilterData(trainExamples, details.filterVal, details.filterFrac, - -1, indicesOnly=1) - tmp = [trainExamples[x] for x in trainIdx] - testExamples += [trainExamples[x] for x in temp] - trainExamples = tmp - if details.balDoHoldout: - testExamples, trainExamples = trainExamples, testExamples - else: - trainExamples = namedExamples - dataSet1 = trainExamples - cols1 = [x.upper() for x in data1.GetVarNames()] - data1 = None - - # - # now grab data set 2: the data used to build the new individual models - # - if data2 is None: - message("\tReading Second Data Set") - data2 = details.GetDataSet() - if data2 is None: - return composite - details.splitFrac = composite._splitFrac - details.randomSeed = composite._randomSeed - DataUtils.InitRandomNumbers(details.randomSeed) - if details.shuffleActivities == 1: - DataUtils.RandomizeActivities(data2, shuffle=1, runDetails=details) - elif details.randomActivities == 1: - DataUtils.RandomizeActivities(data2, shuffle=0, runDetails=details) - dataSet2 = data2.GetNamedData() - cols2 = [x.upper() for x in data2.GetVarNames()] - data2 = None - - # and balance it: - res = [] - weights = details.balWeight - if not isinstance(weights, (tuple, list)): - weights = (weights, ) - for weight in weights: - message("\tBalancing with Weight: %.4f" % (weight)) - res.append( - AdjustComposite.BalanceComposite(composite, dataSet1, dataSet2, weight, details.balCnt, - names1=cols1, names2=cols2)) - return res - - -def ShowVersion(includeArgs=0): - """ prints the version number - - """ - print('This is GrowComposite.py version %s' % (__VERSION_STRING)) - if includeArgs: - print('command line was:') - print(' '.join(sys.argv)) - - -def Usage(): - """ provides a list of arguments for when this is used from the command line - - """ - print(__doc__) - sys.exit(-1) - - -def SetDefaults(runDetails=None): - """ initializes a details object with default values - - **Arguments** - - - details: (optional) a _CompositeRun.CompositeRun_ object. - If this is not provided, the global _runDetails will be used. - - **Returns** - - the initialized _CompositeRun_ object. - - - """ - if runDetails is None: - runDetails = _runDetails - return CompositeRun.SetDefaults(runDetails) - - -def ParseArgs(runDetails): - """ parses command line arguments and updates _runDetails_ - - **Arguments** - - - runDetails: a _CompositeRun.CompositeRun_ object. - - """ - import getopt - args, extra = getopt.getopt(sys.argv[1:], 'P:o:n:p:b:sf:F:v:hlgd:rSTt:Q:q:DVG:L:C:N:', [ - 'inNote=', - 'outNote=', - 'balTable=', - 'balWeight=', - 'balCnt=', - 'balH', - 'balT', - 'balDb=', - ]) - runDetails.inNote = '' - runDetails.composFileName = '' - runDetails.balTable = '' - runDetails.balWeight = (0.5, ) - runDetails.balCnt = 0 - runDetails.balDoHoldout = 0 - runDetails.balDoTrain = 0 - runDetails.balDb = '' - for arg, val in args: - if arg == '-n': - runDetails.nModels = int(val) - elif arg == '-C': - runDetails.composFileName = val - elif arg == '--balTable': - runDetails.balTable = val - elif arg == '--balWeight': - runDetails.balWeight = eval(val) - if not isinstance(runDetails.balWeight, (tuple, list)): - runDetails.balWeight = (runDetails.balWeight, ) - elif arg == '--balCnt': - runDetails.balCnt = int(val) - elif arg == '--balH': - runDetails.balDoHoldout = 1 - elif arg == '--balT': - runDetails.balDoTrain = 1 - elif arg == '--balDb': - runDetails.balDb = val - elif arg == '--inNote': - runDetails.inNote = val - elif arg == '-N' or arg == '--outNote': - runDetails.note = val - elif arg == '-o': - runDetails.outName = val - elif arg == '-p': - runDetails.persistTblName = val - elif arg == '-r': - runDetails.randomActivities = 1 - elif arg == '-S': - runDetails.shuffleActivities = 1 - elif arg == '-h': - Usage() - elif arg == '-l': - runDetails.lockRandom = 1 - elif arg == '-g': - runDetails.lessGreedy = 1 - elif arg == '-G': - runDetails.startAt = int(val) - elif arg == '-d': - runDetails.dbName = val - elif arg == '-T': - runDetails.useTrees = 0 - elif arg == '-t': - runDetails.threshold = float(val) - elif arg == '-D': - runDetails.detailedRes = 1 - elif arg == '-L': - runDetails.limitDepth = int(val) - elif arg == '-q': - qBounds = eval(val) - assert isinstance(qBounds, - (tuple, list)), 'bad argument type for -q, specify a list as a string' - runDetails.qBoundCount = val - runDetails.qBounds = qBounds - elif arg == '-Q': - qBounds = eval(val) - assert type(qBounds) in [type([]), type( - ())], 'bad argument type for -Q, specify a list as a string' - runDetails.activityBounds = qBounds - runDetails.activityBoundsVals = val - elif arg == '-V': - ShowVersion() - sys.exit(0) - else: - print('bad argument:', arg, file=sys.stderr) - Usage() - runDetails.tableName = extra[0] - if not runDetails.balDb: - runDetails.balDb = runDetails.dbName - - -if __name__ == '__main__': - if len(sys.argv) < 2: - Usage() - - _runDetails.cmd = ' '.join(sys.argv) - SetDefaults(_runDetails) - ParseArgs(_runDetails) - - ShowVersion(includeArgs=1) - - initModels = GetComposites(_runDetails) - nModels = len(initModels) - if nModels > 1: - for i in range(nModels): - sys.stderr.write( - '---------------------------------\n\tDoing %d of %d\n---------------------------------\n' % - (i + 1, nModels)) - composite = GrowIt(_runDetails, initModels[i], setDescNames=1) - if _runDetails.balTable and _runDetails.balCnt: - composites = BalanceComposite(_runDetails, composite) - else: - composites = [composite] - for mdl in composites: - mdl.ClearModelExamples() - if _runDetails.outName: - nWeights = len(_runDetails.balWeight) - if nWeights == 1: - outName = _runDetails.outName - composites[0].Pickle(outName) - else: - for i in range(nWeights): - weight = int(100 * _runDetails.balWeight[i]) - model = composites[i] - outName = '%s.%d.pkl' % (_runDetails.outName.split('.pkl')[0], weight) - model.Pickle(outName) - if _runDetails.persistTblName and _runDetails.dbName: - message('Updating results table %s:%s' % (_runDetails.dbName, _runDetails.persistTblName)) - if (len(_runDetails.balWeight)) > 1: - message('WARNING: updating results table with models having different weights') - # save the composite - for i in range(len(composites)): - _runDetails.model = pickle.dumps(composites[i]) - _runDetails.Store(db=_runDetails.dbName, table=_runDetails.persistTblName) - elif nModels == 1: - composite = GrowIt(_runDetails, initModels[0], setDescNames=1) - if _runDetails.balTable and _runDetails.balCnt: - composites = BalanceComposite(_runDetails, composite) - else: - composites = [composite] - for mdl in composites: - mdl.ClearModelExamples() - if _runDetails.outName: - nWeights = len(_runDetails.balWeight) - if nWeights == 1: - outName = _runDetails.outName - composites[0].Pickle(outName) - else: - for i in range(nWeights): - weight = int(100 * _runDetails.balWeight[i]) - model = composites[i] - outName = '%s.%d.pkl' % (_runDetails.outName.split('.pkl')[0], weight) - model.Pickle(outName) - if _runDetails.persistTblName and _runDetails.dbName: - message('Updating results table %s:%s' % (_runDetails.dbName, _runDetails.persistTblName)) - if (len(composites)) > 1: - message('WARNING: updating results table with models having different weights') - for i in range(len(composites)): - _runDetails.model = pickle.dumps(composites[i]) - _runDetails.Store(db=_runDetails.dbName, table=_runDetails.persistTblName) - else: - message("No models found") diff --git a/rdkit/ML/KNN/CrossValidate.py b/rdkit/ML/KNN/CrossValidate.py deleted file mode 100644 index 723f5da6da2..00000000000 --- a/rdkit/ML/KNN/CrossValidate.py +++ /dev/null @@ -1,119 +0,0 @@ -# -# Copyright (C) 2000-2008 greg Landrum -# -""" handles doing cross validation with k-nearest neighbors model - -and evaluation of individual models - -""" - -from rdkit.ML.Data import SplitData -from rdkit.ML.KNN import DistFunctions -from rdkit.ML.KNN.KNNClassificationModel import KNNClassificationModel -from rdkit.ML.KNN.KNNRegressionModel import KNNRegressionModel - - -def makeClassificationModel(numNeigh, attrs, distFunc): - return KNNClassificationModel(numNeigh, attrs, distFunc) - - -def makeRegressionModel(numNeigh, attrs, distFunc): - return KNNRegressionModel(numNeigh, attrs, distFunc) - - -def CrossValidate(knnMod, testExamples, appendExamples=0): - """ - Determines the classification error for the testExamples - - **Arguments** - - - tree: a decision tree (or anything supporting a _ClassifyExample()_ method) - - - testExamples: a list of examples to be used for testing - - - appendExamples: a toggle which is passed along to the tree as it does - the classification. The trees can use this to store the examples they - classify locally. - - **Returns** - - a 2-tuple consisting of: - """ - nTest = len(testExamples) - - if isinstance(knnMod, KNNClassificationModel): - badExamples = [] - nBad = 0 - for i in range(nTest): - testEx = testExamples[i] - trueRes = testEx[-1] - res = knnMod.ClassifyExample(testEx, appendExamples) - if (trueRes != res): - badExamples.append(testEx) - nBad += 1 - return float(nBad) / nTest, badExamples - elif isinstance(knnMod, KNNRegressionModel): - devSum = 0.0 - for i in range(nTest): - testEx = testExamples[i] - trueRes = testEx[-1] - res = knnMod.PredictExample(testEx, appendExamples) - devSum += abs(trueRes - res) - return devSum / nTest, None - raise ValueError("Unrecognized Model Type") - - -def CrossValidationDriver(examples, attrs, nPossibleValues, numNeigh, - modelBuilder=makeClassificationModel, - distFunc=DistFunctions.EuclideanDist, holdOutFrac=0.3, silent=0, - calcTotalError=0, **kwargs): - """ Driver function for building a KNN model of a specified type - - **Arguments** - - - examples: the full set of examples - - - numNeigh: number of neighbors for the KNN model (basically k in k-NN) - - - knnModel: the type of KNN model (a classification vs regression model) - - - holdOutFrac: the fraction of the data which should be reserved for the hold-out set - (used to calculate error) - - - silent: a toggle used to control how much visual noise this makes as it goes - - - calcTotalError: a toggle used to indicate whether the classification error - of the tree should be calculated using the entire data set (when true) or just - the training hold out set (when false) - """ - - nTot = len(examples) - if not kwargs.get('replacementSelection', 0): - testIndices, trainIndices = SplitData.SplitIndices(nTot, holdOutFrac, silent=1, legacy=1, - replacement=0) - else: - testIndices, trainIndices = SplitData.SplitIndices(nTot, holdOutFrac, silent=1, legacy=0, - replacement=1) - trainExamples = [examples[x] for x in trainIndices] - testExamples = [examples[x] for x in testIndices] - - nTrain = len(trainExamples) - - if not silent: - print("Training with %d examples" % (nTrain)) - - knnMod = modelBuilder(numNeigh, attrs, distFunc) - - knnMod.SetTrainingExamples(trainExamples) - knnMod.SetTestExamples(testExamples) - - if not calcTotalError: - xValError, _ = CrossValidate(knnMod, testExamples, appendExamples=1) - else: - xValError, _ = CrossValidate(knnMod, examples, appendExamples=0) - - if not silent: - print('Validation error was %%%4.2f' % (100 * xValError)) - - knnMod._trainIndices = trainIndices - return knnMod, xValError diff --git a/rdkit/ML/KNN/DistFunctions.py b/rdkit/ML/KNN/DistFunctions.py deleted file mode 100755 index 0763b28f8fd..00000000000 --- a/rdkit/ML/KNN/DistFunctions.py +++ /dev/null @@ -1,76 +0,0 @@ -# $Id$ -# -# Copyright (C) 2003 Rational Discovery LLC -# All Rights Reserved -# - -import math - - -def EuclideanDist(ex1, ex2, attrs): - """ - >>> v1 = [0,1,0,1] - >>> v2 = [1,0,1,0] - >>> EuclideanDist(v1,v2,range(4)) - 2.0 - >>> EuclideanDist(v1,v1,range(4)) - 0.0 - >>> v2 = [0,0,0,1] - >>> EuclideanDist(v1,v2,range(4)) - 1.0 - >>> v2 = [0,.5,0,.5] - >>> abs(EuclideanDist(v1,v2,range(4))-1./math.sqrt(2))<1e-4 - 1 - - """ - dist = 0.0 - for i in attrs: - dist += (ex1[i] - ex2[i])**2 - dist = math.sqrt(dist) - return dist - - -def TanimotoDist(ex1, ex2, attrs): - """ - >>> v1 = [0,1,0,1] - >>> v2 = [1,0,1,0] - >>> TanimotoDist(v1,v2,range(4)) - 1.0 - >>> v2 = [1,0,1,1] - >>> TanimotoDist(v1,v2,range(4)) - 0.75 - >>> TanimotoDist(v2,v2,range(4)) - 0.0 - - # this tests Issue 122 - >>> v3 = [0,0,0,0] - >>> TanimotoDist(v3,v3,range(4)) - 1.0 - - """ - inter = 0.0 - unin = 0.0 - for i in attrs: - if (ex1[i] or ex2[i]): - unin += 1 - if (ex1[i] and ex2[i]): - inter += 1 - if (unin != 0.0): - return (1 - inter / unin) - else: - return 1.0 - - -# ------------------------------------ -# -# doctest boilerplate -# -def _runDoctests(verbose=None): # pragma: nocover - import doctest - import sys - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) - - -if __name__ == '__main__': # pragma: nocover - _runDoctests() diff --git a/rdkit/ML/KNN/KNNClassificationModel.py b/rdkit/ML/KNN/KNNClassificationModel.py deleted file mode 100644 index 4c48d3b9c73..00000000000 --- a/rdkit/ML/KNN/KNNClassificationModel.py +++ /dev/null @@ -1,80 +0,0 @@ -# $Id$ -# -# Copyright (C) 2003 Rational Discovery LLC -# All Rights Reserved -# -""" Define the class _KNNClassificationModel_, used to represent a k-nearest neighbhors - classification model - - Inherits from _KNNModel_ -""" - -from rdkit.ML.KNN import KNNModel - - -class KNNClassificationModel(KNNModel.KNNModel): - """ This is used to represent a k-nearest neighbor classifier - - """ - - def __init__(self, k, attrs, dfunc, radius=None): - self._setup(k, attrs, dfunc, radius) - - self._badExamples = [] # list of examples incorrectly classified - - def type(self): - return "Classification Model" - - def SetBadExamples(self, examples): - self._badExamples = examples - - def GetBadExamples(self): - return self._badExamples - - def NameModel(self, varNames): - self.SetName(self.type()) - - def ClassifyExample(self, example, appendExamples=0, neighborList=None): - """ Classify a an example by looking at its closest neighbors - - The class assigned to this example is same as the class for the mojority of its - _k neighbors - - **Arguments** - - - examples: the example to be classified - - - appendExamples: if this is nonzero then the example will be stored on this model - - - neighborList: if provided, will be used to return the list of neighbors - - **Returns** - - - the classification of _example_ - """ - if appendExamples: - self._examples.append(example) - - # first find the k-closest examples in the traning set - knnLst = self.GetNeighbors(example) - - # find out how many of the neighbors belong to each of the classes - clsCnt = {} - for knn in knnLst: - cls = knn[1][-1] - if (cls in clsCnt): - clsCnt[cls] += 1 - else: - clsCnt[cls] = 1 - if neighborList is not None: - neighborList.extend(knnLst) - - # now return the class with the maximum count - mkey = -1 - mcnt = -1 - for key in clsCnt.keys(): - if (mcnt < clsCnt[key]): - mkey = key - mcnt = clsCnt[key] - - return mkey diff --git a/rdkit/ML/KNN/KNNModel.py b/rdkit/ML/KNN/KNNModel.py deleted file mode 100644 index 0200abe951f..00000000000 --- a/rdkit/ML/KNN/KNNModel.py +++ /dev/null @@ -1,79 +0,0 @@ -# $Id$ -# -# Copyright (C) 2003 Rational Discovery LLC -# All Rights Reserved -# -""" Define the class _KNNModel_, used to represent a k-nearest neighbhors model - -""" -from rdkit.DataStructs.TopNContainer import TopNContainer - - -class KNNModel(object): - """ This is a base class used by KNNClassificationModel - and KNNRegressionModel to represent a k-nearest neighbor predictor. In general - one of this child classes needs to be instantiated. - - _KNNModel_s can save the following pieces of internal state, accessible via - standard setter/getter functions - the child object store additional stuff: - - 1) _Examples_: a list of examples which have been predicted (either classified - or values predicted) - - 2) _TrainingExamples_: List of training examples (since this is a KNN model these examples - along with the value _k_ below define the model) - - 3) _TestExamples_: the list of examples used to test the model - - 4) _k_: the number of closest neighbors used for prediction - - """ - - def __init__(self, k, attrs, dfunc, radius=None): - self._setup(k, attrs, dfunc, radius) - - def _setup(self, k, attrs, dfunc, radius): - self._examples = [] - self._trainingExamples = [] - self._testExamples = [] - self._k = k - self._attrs = attrs - self._dfunc = dfunc - self._name = "" - self._radius = radius - - def GetName(self): - return self._name - - def SetName(self, name): - self._name = name - - def GetExamples(self): - return self._examples - - def SetExamples(self, examples): - self._examples = examples - - def GetTrainingExamples(self): - return self._trainingExamples - - def SetTrainingExamples(self, examples): - self._trainingExamples = examples - - def GetTestExamples(self): - return self._testExamples - - def SetTestExamples(self, examples): - self._testExamples = examples - - def GetNeighbors(self, example): - """ Returns the k nearest neighbors of the example - - """ - nbrs = TopNContainer(self._k) - for trex in self._trainingExamples: - dist = self._dfunc(trex, example, self._attrs) - if self._radius is None or dist < self._radius: - nbrs.Insert(-dist, trex) - nbrs.reverse() - return [x for x in nbrs] diff --git a/rdkit/ML/KNN/KNNRegressionModel.py b/rdkit/ML/KNN/KNNRegressionModel.py deleted file mode 100644 index 6261a26e6ad..00000000000 --- a/rdkit/ML/KNN/KNNRegressionModel.py +++ /dev/null @@ -1,81 +0,0 @@ -# $Id$ -# -# Copyright (C) 2003 Rational Discovery LLC -# All Rights Reserved -# -""" Define the class _KNNRegressionModel_, used to represent a k-nearest neighbhors -regression model - - Inherits from _KNNModel_ -""" - -from rdkit.ML.KNN import KNNModel - - -class KNNRegressionModel(KNNModel.KNNModel): - """ This is used to represent a k-nearest neighbor classifier - - """ - - def __init__(self, k, attrs, dfunc, radius=None): - self._setup(k, attrs, dfunc, radius) - - self._badExamples = [] # list of examples incorrectly classified - - def type(self): - return "Regression Model" - - def SetBadExamples(self, examples): - self._badExamples = examples - - def GetBadExamples(self): - return self._badExamples - - def NameModel(self, varNames): - self.SetName(self.type()) - - def PredictExample(self, example, appendExamples=0, weightedAverage=0, neighborList=None): - """ Generates a prediction for an example by looking at its closest neighbors - - **Arguments** - - - examples: the example to be classified - - - appendExamples: if this is nonzero then the example will be stored on this model - - - weightedAverage: if provided, the neighbors' contributions to the value will be - weighed by their reciprocal square distance - - - neighborList: if provided, will be used to return the list of neighbors - - **Returns** - - - the classification of _example_ - - """ - if appendExamples: - self._examples.append(example) - - # first find the k-closest examples in the training set - knnLst = self.GetNeighbors(example) - - accum = 0.0 - denom = 0.0 - for knn in knnLst: - if knn[1] is None: - continue - if weightedAverage: - dist = knn[0] - if dist == 0.0: - w = 1. - else: - w = 1. / dist - else: - w = 1.0 - accum += w * knn[1][-1] - denom += w - if denom: - accum /= denom - if neighborList is not None: - neighborList.extend(knnLst) - return accum diff --git a/rdkit/ML/KNN/UnitTestKNN.py b/rdkit/ML/KNN/UnitTestKNN.py deleted file mode 100755 index 3e608bc2547..00000000000 --- a/rdkit/ML/KNN/UnitTestKNN.py +++ /dev/null @@ -1,125 +0,0 @@ -# -# Copyright (C) 2003 Rational Discovery LLC -# All Rights Reserved -""" unit testing code for knn models """ -import doctest -import os.path -import unittest - -from rdkit import RDConfig, RDRandom -from rdkit.ML.Data import DataUtils -from rdkit.ML.KNN import (CrossValidate, DistFunctions, KNNModel, - KNNRegressionModel) - - -def feq(a, b, tol=1e-4): - return abs(a - b) < tol - - -def load_tests(loader, tests, ignore): - """ Add the Doctests from the module """ - tests.addTests(doctest.DocTestSuite(DistFunctions, optionflags=doctest.ELLIPSIS)) - return tests - - -class TestCase(unittest.TestCase): - - def setUp(self): - RDRandom.seed(25) - - def test1Neighbors(self): - fName = os.path.join(RDConfig.RDCodeDir, 'ML', 'KNN', 'test_data', 'random_pts.csv') - data = DataUtils.TextFileToData(fName) - examples = data.GetNamedData() - npvals = data.GetNPossibleVals() - nvars = data.GetNVars() - attrs = list(range(1, nvars + 1)) - numNeigh = 11 - metric = DistFunctions.EuclideanDist - mdl = KNNModel.KNNModel(numNeigh, attrs, metric) - pt = examples.pop(0) - tgt = [(metric(pt, ex, attrs), ex) for ex in examples] - tgt.sort() - mdl.SetTrainingExamples(examples) - neighbors = mdl.GetNeighbors(pt) - for i in range(numNeigh): - assert feq(-tgt[i][0], neighbors[i][0]) - assert tgt[i][1][0] == neighbors[i][1][0] - - def test2XValClass(self): - fName = os.path.join(RDConfig.RDCodeDir, 'ML', 'KNN', 'test_data', 'random_pts.csv') - data = DataUtils.TextFileToData(fName) - examples = data.GetNamedData() - npvals = data.GetNPossibleVals() - nvars = data.GetNVars() - attrs = list(range(1, nvars + 1)) - numNeigh = 11 - mod, err = CrossValidate.CrossValidationDriver(examples, attrs, npvals, numNeigh, silent=1) - self.assertAlmostEqual(err, 0.01075, 4) - - neighborList = [] - res = mod.ClassifyExample(examples[0], neighborList=neighborList) - self.assertEqual(res, 1) - self.assertEqual(neighborList[0][1], examples[0]) - - self.assertEqual(mod.GetName(), '') - mod.SetName('name') - self.assertEqual(mod.GetName(), 'name') - self.assertEqual(mod.type(), 'Classification Model') - mod.NameModel('this argument is ignored') - self.assertEqual(mod.GetName(), 'Classification Model') - - def test3Regress(self): - # """ a carefully laid out regression data set where the results are clear: """ - fName = os.path.join(RDConfig.RDCodeDir, 'ML', 'KNN', 'test_data', 'sample_pts.csv') - data = DataUtils.TextFileToData(fName) - examples = data.GetNamedData() - nvars = data.GetNVars() - attrs = list(range(1, nvars + 1)) - numNeigh = 4 - metric = DistFunctions.EuclideanDist - mdl = KNNRegressionModel.KNNRegressionModel(numNeigh, attrs, metric) - mdl.SetTrainingExamples(examples) - - res = mdl.PredictExample([4, -3.5, 2.5, 0]) - assert feq(res, 1.25) - res = mdl.PredictExample([4, 3, 2, 0]) - assert feq(res, 1.5) - res = mdl.PredictExample([4, 3, -2.5, 0]) - assert feq(res, -1.5) - # Use a distance dependent weight for the neighbours - res = mdl.PredictExample([4, 3, -2.5, 0], weightedAverage=True) - self.assertAlmostEqual(res, -1.6) - # Check the case that the example is identical to one of the neighbours (distance = 0) - neighborList = [] - res = mdl.PredictExample(examples[0], weightedAverage=True, neighborList=neighborList) - self.assertAlmostEqual(res, 1.5857864) - self.assertEqual(neighborList[0][1], examples[0]) - - self.assertEqual(mdl.GetBadExamples(), []) - - self.assertEqual(mdl.GetName(), '') - mdl.SetName('name') - self.assertEqual(mdl.GetName(), 'name') - self.assertEqual(mdl.type(), 'Regression Model') - mdl.NameModel('this argument is ignored') - self.assertEqual(mdl.GetName(), 'Regression Model') - - self.assertEqual(sorted(mdl.GetTrainingExamples() + mdl.GetTestExamples()), sorted(examples)) - - def test4XValRegress(self): - fName = os.path.join(RDConfig.RDCodeDir, 'ML', 'KNN', 'test_data', 'random_pts.csv') - data = DataUtils.TextFileToData(fName) - examples = data.GetNamedData() - npvals = data.GetNPossibleVals() - nvars = data.GetNVars() - attrs = list(range(1, nvars + 1)) - numNeigh = 11 - _, err = CrossValidate.CrossValidationDriver(examples, attrs, npvals, numNeigh, silent=1, - modelBuilder=CrossValidate.makeRegressionModel) - # NOTE: this number hasn't been extensively checked - self.assertAlmostEqual(err, 0.0777, 4) - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/KNN/__init__.py b/rdkit/ML/KNN/__init__.py deleted file mode 100644 index c90b0c43381..00000000000 --- a/rdkit/ML/KNN/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# copyright 2000, greg landrum -""" - -Here is the implementation for K-nearest neighbors - -""" -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) diff --git a/rdkit/ML/KNN/test_data/random_pts.csv b/rdkit/ML/KNN/test_data/random_pts.csv deleted file mode 100644 index 31e8229fed4..00000000000 --- a/rdkit/ML/KNN/test_data/random_pts.csv +++ /dev/null @@ -1,600 +0,0 @@ -1,0.019,0.263,0.750,0.768,0.283,0.490,1 -2,0.248,0.439,0.892,0.157,0.264,0.838,1 -3,0.287,0.294,0.331,0.559,0.250,0.685,1 -4,0.173,0.201,0.890,0.820,0.278,0.007,1 -5,0.198,0.883,0.903,0.654,0.196,0.365,1 -6,0.411,0.252,0.799,0.295,0.391,0.930,1 -7,0.761,0.840,0.045,0.619,0.831,0.860,1 -8,0.106,0.077,0.430,0.992,0.947,0.486,1 -9,0.066,0.589,0.837,0.223,0.139,0.035,1 -10,0.574,0.607,0.998,0.840,0.558,0.688,1 -11,0.437,0.597,0.355,0.663,0.477,0.939,1 -12,0.085,0.033,0.642,0.050,0.778,0.743,1 -13,0.564,0.608,0.968,0.222,0.745,0.568,1 -14,0.530,0.750,0.252,0.345,0.515,0.811,1 -15,0.066,0.858,0.724,0.831,0.631,0.322,1 -16,0.169,0.189,0.827,0.553,0.566,0.515,1 -17,0.988,0.413,0.767,0.898,0.860,0.233,1 -18,0.823,0.659,0.790,0.745,0.640,0.515,1 -19,0.151,0.672,0.798,0.011,0.259,0.098,1 -20,0.360,0.182,0.037,0.530,0.093,0.416,1 -21,0.605,0.686,0.253,0.062,0.036,0.290,1 -22,0.802,0.666,0.633,0.748,0.694,0.221,1 -23,0.466,0.127,0.260,0.485,0.493,0.738,1 -24,0.221,0.068,0.573,0.975,0.250,0.357,1 -25,0.758,0.484,0.999,0.645,0.015,0.812,1 -26,0.738,0.090,0.475,0.496,0.743,0.266,1 -27,0.553,0.967,0.373,0.990,0.794,0.762,1 -28,0.954,0.235,0.612,0.433,0.478,0.180,1 -29,0.223,0.480,0.819,0.160,0.283,0.089,1 -30,0.882,0.684,0.352,0.974,0.366,0.713,1 -31,0.205,0.260,0.844,0.982,0.906,0.350,1 -32,0.228,0.571,0.733,0.852,0.429,0.041,1 -33,0.896,0.102,0.564,0.165,0.494,0.505,1 -34,0.375,0.826,0.071,0.543,0.341,0.073,1 -35,0.257,0.971,0.690,0.521,0.043,0.398,1 -36,0.905,0.731,0.497,0.368,0.683,0.329,1 -37,0.333,0.247,0.374,0.672,0.050,0.671,1 -38,0.715,0.059,0.771,0.202,0.693,0.327,1 -39,0.375,0.918,0.901,0.673,0.473,0.505,1 -40,0.862,0.819,0.679,0.000,0.688,0.416,1 -41,0.698,0.633,0.429,0.156,0.438,0.504,1 -42,0.511,0.247,0.445,0.828,0.499,0.516,1 -43,0.143,0.310,0.530,0.167,0.983,0.158,1 -44,0.263,0.522,0.507,0.012,0.518,0.186,1 -45,0.141,0.270,0.316,0.280,0.362,0.423,1 -46,0.944,0.529,0.953,0.969,0.768,0.719,1 -47,0.444,0.415,0.481,0.809,0.075,0.276,1 -48,0.038,0.821,0.072,0.362,0.571,0.448,1 -49,0.857,0.029,0.464,0.926,0.554,0.487,1 -50,0.715,0.614,0.310,0.889,0.814,0.920,1 -51,0.498,0.113,0.222,0.281,0.357,0.978,1 -52,0.063,0.127,0.968,0.193,0.755,0.341,1 -53,0.629,0.272,0.591,0.899,0.690,0.236,1 -54,0.029,0.677,0.989,0.904,0.568,0.946,1 -55,0.790,0.716,0.069,0.869,0.577,0.878,1 -56,0.486,0.669,0.570,0.696,0.545,0.082,1 -57,0.654,0.613,0.015,0.549,0.342,0.078,1 -58,0.119,0.499,0.438,0.943,0.635,0.388,1 -59,0.758,0.545,0.994,0.779,0.353,0.036,1 -60,0.141,0.126,0.181,0.901,0.827,0.290,1 -61,0.605,0.559,0.731,0.617,0.865,0.156,1 -62,0.291,0.066,0.265,0.683,0.626,0.946,1 -63,0.159,0.479,0.220,0.348,0.083,0.243,1 -64,0.059,0.469,0.829,0.593,0.049,0.827,1 -65,0.899,0.603,0.325,0.911,0.622,0.323,1 -66,0.586,0.585,0.850,0.308,0.218,0.703,1 -67,0.123,0.255,0.948,0.645,0.863,0.032,1 -68,0.848,0.078,0.031,0.801,0.445,0.170,1 -69,0.992,0.441,0.111,0.594,0.939,0.244,1 -70,0.203,0.893,0.409,0.199,0.803,0.295,1 -71,0.849,0.024,0.450,0.075,0.248,0.079,1 -72,0.327,0.509,0.247,0.009,0.604,0.001,1 -73,0.671,0.597,0.930,0.157,0.216,0.398,1 -74,0.154,0.129,0.156,0.134,0.822,0.934,1 -75,0.229,0.994,0.082,0.387,0.542,0.913,1 -76,0.079,0.568,0.075,0.698,0.675,0.524,1 -77,0.401,0.271,0.723,0.790,0.264,0.130,1 -78,0.761,0.152,0.195,0.996,0.977,0.818,1 -79,0.676,0.885,0.448,0.217,0.095,0.127,1 -80,0.546,0.147,0.107,0.777,0.047,0.761,1 -81,0.662,0.341,0.035,0.069,0.164,0.563,1 -82,0.380,0.159,0.307,0.009,0.533,0.165,1 -83,0.778,0.412,0.814,0.678,0.944,0.972,1 -84,0.181,0.858,0.303,0.997,0.468,0.891,1 -85,0.575,0.064,0.420,0.438,0.751,0.790,1 -86,0.064,0.251,0.537,0.667,0.473,0.926,1 -87,0.985,0.880,0.201,0.996,0.287,0.265,1 -88,0.521,0.344,0.976,0.013,0.213,0.682,1 -89,0.874,0.687,0.569,0.907,0.874,0.625,1 -90,0.827,0.405,0.767,0.275,0.621,0.010,1 -91,0.442,0.591,0.859,0.808,0.228,0.205,1 -92,0.627,0.603,0.993,0.036,0.300,0.219,1 -93,0.454,0.122,0.788,0.928,0.726,0.761,1 -94,0.598,0.571,0.418,0.187,0.057,0.213,1 -95,0.887,0.573,0.063,0.793,0.867,0.611,1 -96,0.654,0.622,0.321,0.451,0.684,0.931,1 -97,0.895,0.535,0.819,0.419,0.726,0.005,1 -98,0.576,0.545,0.235,0.942,0.102,0.279,1 -99,0.226,0.528,0.797,0.974,0.303,0.976,1 -100,0.651,0.101,0.875,0.588,0.477,0.543,1 -101,0.247,0.321,0.930,0.304,0.312,0.318,1 -102,0.013,0.008,0.131,0.649,0.160,0.967,1 -103,0.151,0.995,0.387,0.864,0.887,0.896,1 -104,0.690,0.483,0.336,0.280,0.306,0.282,1 -105,0.417,0.960,0.303,0.319,0.849,0.244,1 -106,0.924,0.796,0.989,0.511,0.497,0.290,1 -107,0.973,0.168,0.303,0.353,0.417,0.005,1 -108,0.697,0.806,0.288,0.930,0.705,0.424,1 -109,0.365,0.255,0.963,0.956,0.092,0.997,1 -110,0.672,0.605,0.420,0.383,0.784,0.540,1 -111,0.282,0.305,0.265,0.235,0.245,0.800,1 -112,0.612,0.962,0.672,0.881,0.251,0.867,1 -113,0.824,0.144,0.348,0.282,0.704,0.283,1 -114,0.363,0.190,0.773,0.753,0.441,0.490,1 -115,0.808,0.108,0.769,0.556,0.110,0.398,1 -116,0.653,0.469,0.805,0.523,0.511,0.885,1 -117,0.612,0.428,0.383,0.955,0.120,0.875,1 -118,0.704,0.287,0.552,0.301,0.163,0.420,1 -119,0.725,0.691,0.481,0.284,0.585,0.008,1 -120,0.884,0.792,0.953,0.488,0.871,0.150,1 -121,0.630,0.308,0.533,0.610,0.263,0.984,1 -122,0.804,0.389,0.622,0.631,0.427,0.862,1 -123,0.027,0.871,0.975,0.940,0.550,0.160,1 -124,0.237,0.448,0.843,0.387,0.158,0.176,1 -125,0.908,0.474,0.088,0.670,0.642,0.164,1 -126,0.635,0.672,0.200,0.920,0.710,0.318,1 -127,0.097,0.774,0.420,0.801,0.313,0.260,1 -128,0.234,0.203,0.641,0.449,0.891,0.133,1 -129,0.622,0.707,0.070,0.212,0.752,0.023,1 -130,0.134,0.099,0.492,0.918,0.445,0.574,1 -131,0.794,0.197,0.675,0.220,0.843,0.155,1 -132,0.005,0.452,0.258,0.647,0.180,0.687,1 -133,0.153,0.109,0.620,0.179,0.182,0.710,1 -134,0.766,0.929,0.003,0.258,0.656,0.822,1 -135,0.268,0.747,0.158,0.153,0.975,0.470,1 -136,0.902,0.448,0.326,0.840,0.435,0.528,1 -137,0.725,0.168,0.752,0.187,0.541,0.585,1 -138,0.236,0.442,0.820,0.328,0.434,0.562,1 -139,0.916,0.340,0.596,0.618,0.109,0.511,1 -140,0.771,0.770,0.024,0.563,0.130,0.610,1 -141,0.530,0.014,0.449,0.524,0.893,0.645,1 -142,0.404,0.197,0.109,0.186,0.329,0.956,1 -143,0.378,0.399,0.748,0.802,0.574,0.030,1 -144,0.136,0.339,0.160,0.133,0.485,0.637,1 -145,0.392,0.181,0.003,0.558,0.309,0.386,1 -146,0.115,0.165,0.238,0.291,0.703,0.085,1 -147,0.170,0.101,0.217,0.035,0.384,0.583,1 -148,0.498,0.920,0.134,0.246,0.253,0.948,1 -149,0.399,0.605,0.917,0.476,0.547,0.385,1 -150,0.611,0.633,0.345,0.211,0.384,0.617,1 -151,0.824,0.602,0.207,0.444,0.423,0.055,1 -152,0.308,0.745,0.588,0.987,0.041,0.010,1 -153,0.921,0.465,0.203,0.109,0.121,0.482,1 -154,0.526,0.822,0.883,0.745,0.252,0.420,1 -155,0.945,0.161,0.150,0.892,0.958,0.195,1 -156,0.204,0.635,0.117,0.524,0.400,0.854,1 -157,0.097,0.396,0.595,0.813,0.962,0.559,1 -158,0.940,0.297,0.425,0.319,0.753,0.765,1 -159,0.476,0.874,0.106,0.952,0.974,0.517,1 -160,0.058,0.077,0.048,0.126,0.458,0.193,1 -161,0.932,0.144,0.766,0.958,0.993,0.071,1 -162,0.290,0.167,0.836,0.498,0.285,0.326,1 -163,0.192,0.239,0.439,0.383,0.935,0.063,1 -164,0.436,0.619,0.774,0.248,0.519,0.508,1 -165,0.638,0.887,0.558,0.042,0.656,0.253,1 -166,0.653,0.299,0.698,0.944,0.455,0.688,1 -167,0.679,0.199,0.920,0.830,0.020,0.208,1 -168,0.057,0.702,0.567,0.209,0.176,0.962,1 -169,0.103,0.745,0.042,0.479,0.466,0.175,1 -170,0.129,0.687,0.713,0.362,0.987,0.676,1 -171,0.271,0.447,0.327,0.508,0.568,0.449,1 -172,0.520,0.275,0.563,0.512,0.499,0.825,1 -173,0.984,0.173,0.981,0.405,0.588,0.986,1 -174,0.025,0.748,0.202,0.736,0.165,0.363,1 -175,0.589,0.902,0.926,0.437,0.308,0.095,1 -176,0.679,0.530,0.351,0.483,0.825,0.841,1 -177,0.490,0.386,0.650,0.645,0.262,0.137,1 -178,0.163,0.377,0.093,0.744,0.645,0.774,1 -179,0.871,0.915,0.564,0.088,0.600,0.701,1 -180,0.888,0.717,0.470,0.119,0.187,0.715,1 -181,0.024,0.111,0.450,0.282,0.988,0.670,1 -182,0.848,0.784,0.548,0.140,0.010,0.263,1 -183,0.230,0.725,0.320,0.228,0.340,0.566,1 -184,0.864,0.405,0.328,0.891,0.359,0.870,1 -185,0.476,0.087,0.289,0.370,0.812,0.891,1 -186,0.541,0.486,0.912,0.337,0.622,0.677,1 -187,0.733,0.385,0.009,0.576,0.971,0.110,1 -188,0.453,0.970,0.345,0.343,0.471,0.421,1 -189,0.905,0.773,0.201,0.975,0.953,0.643,1 -190,0.163,0.798,0.057,0.797,0.220,0.022,1 -191,0.497,0.115,0.794,0.863,0.714,0.778,1 -192,0.207,0.754,0.397,0.762,0.890,0.986,1 -193,0.157,0.674,0.078,0.678,0.688,0.461,1 -194,0.291,0.312,0.463,0.470,0.460,0.783,1 -195,0.184,0.387,0.496,0.810,0.783,0.507,1 -196,0.313,0.405,0.010,0.959,0.830,0.808,1 -197,0.540,0.175,0.741,0.036,0.100,0.814,1 -198,0.767,0.523,0.343,0.052,0.216,0.967,1 -199,0.915,0.110,0.582,0.268,0.543,0.449,1 -200,0.292,0.296,0.897,0.038,0.472,0.647,1 -201,0.357,0.643,0.029,0.277,0.522,0.378,1 -202,0.858,0.623,0.834,0.644,0.516,0.322,1 -203,0.349,0.108,0.077,0.284,0.339,0.990,1 -204,0.934,0.072,0.424,0.586,0.760,0.149,1 -205,0.126,0.910,0.976,0.355,0.252,0.809,1 -206,0.032,0.108,0.343,0.133,0.275,0.303,1 -207,0.669,0.252,0.023,0.355,0.992,0.844,1 -208,0.919,0.211,0.550,0.325,0.175,0.387,1 -209,0.837,0.852,0.660,0.305,0.917,0.897,1 -210,0.936,0.169,0.091,0.936,0.816,0.258,1 -211,0.722,0.042,0.884,0.155,0.394,0.137,1 -212,0.209,0.758,0.515,0.504,0.612,0.244,1 -213,0.049,0.628,0.259,0.165,0.379,0.138,1 -214,0.104,0.888,0.012,0.444,0.007,0.934,1 -215,0.771,0.177,0.343,0.109,0.787,0.123,1 -216,0.458,0.616,0.569,0.258,0.067,0.314,1 -217,0.054,0.354,0.624,0.357,0.355,0.158,1 -218,0.035,0.672,0.964,0.291,0.116,0.554,1 -219,0.026,0.906,0.036,0.988,0.918,0.287,1 -220,0.902,0.812,0.108,0.378,0.578,0.987,1 -221,0.384,0.175,0.202,0.522,0.839,0.016,1 -222,0.877,0.122,0.281,0.330,0.074,0.500,1 -223,0.008,0.643,0.864,0.267,0.884,0.828,1 -224,0.816,0.056,0.280,0.687,0.337,0.661,1 -225,0.110,0.828,0.272,0.327,0.351,0.983,1 -226,0.666,0.467,0.869,0.692,0.274,0.196,1 -227,0.686,0.885,0.504,0.877,0.615,0.458,1 -228,0.464,0.392,0.176,0.547,0.174,0.232,1 -229,0.182,0.117,0.994,0.870,0.847,0.011,1 -230,0.912,0.030,0.187,0.635,0.620,0.383,1 -231,0.962,0.160,0.900,0.537,0.961,0.204,1 -232,0.638,0.603,0.345,0.876,0.031,0.337,1 -233,0.770,0.895,0.073,0.664,0.647,0.467,1 -234,0.919,0.891,0.403,0.253,0.919,0.492,1 -235,0.333,0.997,0.610,0.537,0.115,0.995,1 -236,0.089,0.737,0.172,0.949,0.897,0.608,1 -237,0.761,0.760,0.539,0.408,0.696,0.254,1 -238,0.227,0.293,0.414,0.777,0.124,0.359,1 -239,0.006,0.066,0.834,0.250,0.230,0.564,1 -240,0.366,0.180,0.665,0.648,0.050,0.369,1 -241,0.980,0.193,0.201,0.776,0.641,0.837,1 -242,0.560,0.718,0.610,0.406,0.962,0.685,1 -243,0.218,0.671,0.299,0.632,0.300,0.074,1 -244,0.214,0.388,0.318,0.708,0.012,0.530,1 -245,0.931,0.770,0.846,0.003,0.189,0.702,1 -246,0.471,0.627,0.013,0.178,0.432,0.824,1 -247,0.610,0.925,0.137,0.413,0.204,0.929,1 -248,0.063,0.448,0.168,0.437,0.776,0.694,1 -249,0.477,0.808,0.469,0.478,0.344,0.388,1 -250,0.362,0.515,0.819,0.634,0.621,0.668,1 -251,0.410,0.379,0.852,0.998,0.141,0.071,1 -252,0.536,0.216,0.976,0.766,0.310,0.100,1 -253,0.878,0.241,0.493,0.549,0.843,0.876,1 -254,0.745,0.315,0.484,0.086,0.408,0.870,1 -255,0.254,0.346,0.915,0.960,0.346,0.959,1 -256,0.525,0.944,0.035,0.888,0.162,0.426,1 -257,0.776,0.752,0.862,0.911,0.573,0.601,1 -258,0.361,0.196,0.064,0.329,0.541,0.521,1 -259,0.741,0.973,0.520,0.065,0.872,0.749,1 -260,0.884,0.736,0.687,0.512,0.525,0.383,1 -261,0.174,0.039,0.920,0.871,0.084,0.955,1 -262,0.754,0.373,0.269,0.392,0.751,0.938,1 -263,0.494,0.472,0.705,0.895,0.092,0.943,1 -264,0.991,0.616,0.300,0.124,0.735,0.930,1 -265,0.369,0.925,0.776,0.496,0.821,0.373,1 -266,0.271,0.514,0.362,0.534,0.206,0.214,1 -267,0.453,0.595,0.859,0.103,0.498,0.869,1 -268,0.782,0.252,0.059,0.320,0.374,0.956,1 -269,0.819,0.644,0.923,0.257,0.878,0.944,1 -270,0.595,0.414,0.155,0.111,0.379,0.930,1 -271,0.253,0.618,0.351,0.004,0.023,0.728,1 -272,0.556,0.451,0.933,0.437,0.954,0.160,1 -273,0.375,0.352,0.568,0.558,0.709,0.717,1 -274,0.473,0.439,0.949,0.234,0.334,0.758,1 -275,0.824,0.754,0.622,0.076,0.025,0.217,1 -276,0.986,0.730,0.587,0.080,0.478,0.529,1 -277,0.362,0.957,0.897,0.375,0.338,0.583,1 -278,0.104,0.340,0.246,0.736,0.393,0.063,1 -279,0.533,0.320,0.445,0.339,0.731,0.336,1 -280,0.515,0.950,0.321,0.841,0.361,0.493,1 -281,0.920,0.566,0.677,0.260,0.961,0.722,1 -282,0.556,0.668,0.266,0.020,0.500,0.372,1 -283,0.362,0.356,0.647,0.311,0.852,0.806,1 -284,0.586,0.621,0.730,0.462,0.387,0.091,1 -285,0.620,0.597,0.526,0.505,0.801,0.717,1 -286,0.284,0.002,0.664,0.405,0.692,0.969,1 -287,0.360,0.371,0.149,0.354,0.734,0.719,1 -288,0.459,0.311,0.583,0.971,0.969,0.647,1 -289,0.800,0.471,0.589,0.789,0.284,0.684,1 -290,0.596,0.251,0.186,0.134,0.967,0.001,1 -291,0.294,0.046,0.511,0.675,0.751,0.274,1 -292,0.613,0.616,0.286,0.923,0.180,0.552,1 -293,0.732,0.199,0.857,0.821,0.905,0.969,1 -294,0.136,0.215,0.022,0.568,0.986,0.972,1 -295,0.577,0.139,0.788,0.345,0.248,0.496,1 -296,0.535,0.086,0.430,0.609,0.357,0.192,1 -297,0.879,0.564,0.248,0.454,0.316,0.688,1 -298,0.235,0.285,0.101,0.517,0.419,0.808,1 -299,0.245,0.240,0.114,0.217,0.437,0.113,1 -300,0.601,0.146,0.514,0.760,0.371,0.866,1 -301,1.786,0.527,0.856,0.809,0.016,0.589,0 -302,1.001,0.241,0.907,0.153,0.361,0.767,0 -303,1.134,0.123,0.042,0.094,0.518,0.555,0 -304,1.500,0.643,0.542,0.554,0.565,0.896,0 -305,1.314,0.268,0.725,0.455,0.508,0.411,0 -306,1.324,0.523,0.164,0.160,0.327,0.911,0 -307,1.157,0.127,0.058,0.808,0.462,0.388,0 -308,1.248,0.872,0.495,0.569,0.774,0.158,0 -309,1.627,0.878,0.569,0.804,0.073,0.247,0 -310,1.465,0.451,0.683,0.724,0.756,0.109,0 -311,1.247,0.679,0.279,0.351,0.659,0.117,0 -312,1.507,0.594,0.923,0.189,0.922,0.851,0 -313,1.229,0.446,0.450,0.519,0.104,1.000,0 -314,1.883,0.674,0.015,0.463,0.299,0.179,0 -315,1.410,0.906,0.172,0.394,0.860,0.367,0 -316,1.859,0.272,0.898,0.668,0.887,0.718,0 -317,1.350,0.589,0.594,0.395,0.580,0.669,0 -318,1.294,0.320,1.000,0.793,0.892,0.141,0 -319,1.186,0.609,0.385,0.274,0.045,0.341,0 -320,1.367,0.066,0.354,0.516,0.664,0.977,0 -321,1.483,0.028,0.763,0.342,0.138,0.850,0 -322,1.449,0.978,0.524,0.988,0.751,0.821,0 -323,1.999,0.048,0.034,0.578,0.858,0.093,0 -324,1.703,0.792,0.662,0.926,0.502,0.569,0 -325,1.229,0.703,0.485,0.034,0.412,0.111,0 -326,1.959,0.139,0.927,0.021,0.223,0.269,0 -327,1.890,0.372,0.061,0.186,0.878,0.958,0 -328,1.563,0.673,0.975,0.187,0.980,0.310,0 -329,1.718,0.478,0.765,0.546,0.083,0.338,0 -330,1.268,0.982,0.255,0.152,0.100,0.943,0 -331,1.443,0.994,0.514,0.074,0.866,0.752,0 -332,1.892,0.798,0.789,0.660,0.676,0.373,0 -333,1.150,0.729,0.980,0.286,0.386,0.041,0 -334,1.826,0.874,0.836,0.551,0.870,0.609,0 -335,1.735,0.730,0.274,0.503,0.234,0.819,0 -336,1.395,0.908,0.018,0.124,0.616,0.478,0 -337,1.103,0.560,0.353,0.027,0.211,0.028,0 -338,1.035,0.926,0.672,0.860,0.761,0.580,0 -339,1.456,0.151,0.832,0.574,0.832,0.193,0 -340,1.148,0.007,0.481,0.651,0.319,0.068,0 -341,1.902,0.916,0.088,0.042,0.635,0.568,0 -342,1.762,0.532,0.276,0.541,0.103,0.287,0 -343,1.758,0.741,0.687,0.985,0.549,0.520,0 -344,1.945,0.435,0.143,0.500,0.206,0.012,0 -345,1.399,0.434,0.855,0.805,0.753,0.890,0 -346,1.443,0.180,0.444,0.601,0.106,0.602,0 -347,1.425,0.059,0.717,0.768,0.170,0.636,0 -348,1.003,0.183,0.190,0.807,0.033,0.656,0 -349,1.668,0.445,0.950,0.167,0.866,0.643,0 -350,1.331,0.510,0.979,0.214,0.528,0.423,0 -351,1.138,0.918,0.529,0.577,0.125,0.477,0 -352,1.575,0.013,0.788,0.396,0.775,0.480,0 -353,1.721,0.753,0.004,0.995,0.654,0.567,0 -354,1.350,0.190,0.035,0.987,0.860,0.031,0 -355,1.412,0.419,0.517,0.306,0.233,0.138,0 -356,1.346,0.313,0.951,0.688,0.357,0.662,0 -357,1.361,0.134,0.706,0.692,0.623,0.858,0 -358,1.664,0.239,0.616,0.466,0.287,0.017,0 -359,1.486,0.680,0.044,0.054,0.196,0.545,0 -360,1.738,0.242,0.655,0.881,0.488,0.142,0 -361,1.673,0.053,0.239,0.864,0.740,0.050,0 -362,1.435,0.437,0.546,0.060,0.908,0.806,0 -363,1.072,0.323,0.254,0.662,0.917,0.005,0 -364,1.810,0.654,0.267,0.308,0.080,0.074,0 -365,1.477,0.474,0.756,0.817,0.061,0.120,0 -366,1.753,0.569,0.933,0.029,0.966,0.840,0 -367,1.175,0.988,0.430,0.225,0.970,0.950,0 -368,1.815,0.829,0.760,0.129,0.188,0.755,0 -369,1.351,0.539,0.709,0.594,0.192,0.460,0 -370,1.443,0.085,0.146,0.385,0.375,0.193,0 -371,1.059,0.918,0.933,0.756,0.828,0.788,0 -372,1.220,0.400,0.551,0.108,0.436,0.991,0 -373,1.106,0.976,0.413,0.534,0.723,0.211,0 -374,1.977,0.326,0.243,0.319,0.126,0.471,0 -375,1.772,0.703,0.092,0.886,0.770,0.781,0 -376,1.857,0.303,0.383,0.294,0.061,0.638,0 -377,1.844,0.093,0.061,0.546,0.484,0.076,0 -378,1.026,0.795,0.940,0.770,0.082,0.741,0 -379,1.598,0.198,0.051,0.664,0.045,0.543,0 -380,1.838,0.708,0.859,0.411,0.443,0.438,0 -381,1.175,0.723,0.577,0.244,0.263,0.153,0 -382,1.380,0.796,0.893,0.284,0.792,0.293,0 -383,1.651,0.624,0.741,0.908,0.812,0.353,0 -384,1.327,0.584,0.344,0.406,0.283,0.762,0 -385,1.686,0.676,0.776,0.431,0.341,0.143,0 -386,1.615,0.609,0.193,0.662,0.153,0.681,0 -387,1.597,0.643,0.670,0.616,0.352,0.645,0 -388,1.432,0.041,0.467,0.769,0.399,0.068,0 -389,1.944,0.347,0.913,0.876,0.573,0.820,0 -390,1.154,0.801,0.981,0.291,0.811,0.223,0 -391,1.157,0.233,0.869,0.709,0.446,0.923,0 -392,1.088,0.301,0.742,0.346,0.994,0.168,0 -393,1.035,0.820,0.383,0.613,0.370,0.807,0 -394,1.288,0.355,0.738,0.838,0.329,0.633,0 -395,1.255,0.492,0.553,0.247,0.646,0.972,0 -396,1.169,0.052,0.870,0.692,0.476,0.172,0 -397,1.113,0.311,0.106,0.620,0.800,0.319,0 -398,1.612,0.381,0.129,0.326,0.040,0.445,0 -399,1.532,0.505,0.955,0.487,0.458,0.303,0 -400,1.475,0.139,0.692,0.580,0.721,0.136,0 -401,1.632,0.129,0.828,0.344,0.911,0.109,0 -402,1.859,0.536,0.564,0.524,0.478,0.161,0 -403,1.674,0.541,0.088,0.428,0.301,0.434,0 -404,1.170,0.582,0.378,0.978,0.144,0.301,0 -405,1.954,0.887,0.757,0.304,0.224,0.385,0 -406,1.094,0.582,0.382,0.671,0.873,0.216,0 -407,1.592,0.664,0.452,0.526,0.779,0.851,0 -408,1.980,0.601,0.061,0.969,0.602,0.315,0 -409,1.533,0.740,0.723,0.844,0.400,0.198,0 -410,1.435,0.671,0.419,0.929,0.693,0.852,0 -411,1.871,0.596,0.431,0.725,0.763,0.485,0 -412,1.160,0.446,0.479,0.771,0.000,0.629,0 -413,1.868,0.747,0.506,0.904,0.211,0.690,0 -414,1.615,0.086,0.894,0.220,0.357,0.153,0 -415,1.190,0.702,0.062,0.398,0.732,0.082,0 -416,1.786,0.100,0.931,0.644,0.130,0.291,0 -417,1.386,0.380,0.513,0.027,0.065,0.363,0 -418,1.203,0.155,0.591,0.816,0.434,0.857,0 -419,1.485,0.788,0.755,0.454,0.170,0.330,0 -420,1.201,0.984,0.923,0.009,0.465,0.994,0 -421,1.522,0.129,0.218,0.944,0.419,0.828,0 -422,1.609,0.915,0.362,0.614,0.811,0.839,0 -423,1.023,0.545,0.326,0.149,0.121,0.328,0 -424,1.556,0.480,0.725,0.554,0.400,0.071,0 -425,1.354,0.436,0.149,0.667,0.931,0.816,0 -426,1.098,0.766,0.551,0.119,0.374,0.862,0 -427,1.673,0.321,0.643,0.426,0.586,0.042,0 -428,1.582,0.460,0.467,0.132,0.735,0.419,0 -429,1.233,0.113,0.100,0.190,0.310,0.056,0 -430,1.551,0.269,0.081,0.720,0.072,0.850,0 -431,1.367,0.526,0.553,0.767,0.463,0.193,0 -432,1.861,0.078,0.757,0.395,0.788,0.983,0 -433,1.788,0.852,0.656,0.558,0.547,0.052,0 -434,1.958,0.828,0.949,0.819,0.409,0.040,0 -435,1.904,0.194,0.542,0.210,0.479,0.094,0 -436,1.724,0.807,0.549,0.362,0.032,0.964,0 -437,1.070,0.942,0.334,0.142,0.190,0.724,0 -438,1.525,0.391,0.412,0.510,0.984,0.797,0 -439,1.828,0.251,0.645,0.471,0.233,0.575,0 -440,1.179,0.547,0.123,0.908,0.341,0.842,0 -441,1.417,0.100,0.212,0.845,0.847,0.601,0 -442,1.931,0.901,0.111,0.402,0.098,0.480,0 -443,1.997,0.053,0.724,0.448,0.413,0.479,0 -444,1.510,0.074,0.763,0.664,0.918,0.865,0 -445,1.727,0.467,0.175,0.053,0.985,0.357,0 -446,1.957,0.545,0.041,0.479,0.974,0.825,0 -447,1.706,0.936,0.764,0.079,0.769,0.649,0 -448,1.540,0.666,0.056,0.558,0.574,0.201,0 -449,1.836,0.741,0.715,0.198,0.102,0.321,0 -450,1.903,0.947,0.213,0.949,0.300,0.827,0 -451,1.289,0.421,0.444,0.439,0.177,0.320,0 -452,1.865,0.656,0.846,0.403,0.462,0.011,0 -453,1.773,0.812,0.006,0.440,0.759,0.690,0 -454,1.522,0.420,0.348,0.431,0.049,0.926,0 -455,1.243,0.849,0.283,0.554,0.008,0.379,0 -456,1.988,0.096,0.593,0.794,0.437,0.207,0 -457,1.669,0.546,0.906,0.250,0.284,0.437,0 -458,1.606,0.740,0.723,0.939,0.890,0.204,0 -459,1.080,0.690,0.715,0.792,0.117,0.567,0 -460,1.163,0.640,0.945,0.657,0.427,0.831,0 -461,1.973,0.875,0.086,0.182,0.147,0.501,0 -462,1.261,0.247,0.539,0.621,0.650,0.033,0 -463,1.378,0.786,0.107,0.627,0.984,0.881,0 -464,1.543,0.492,0.432,0.635,0.103,0.545,0 -465,1.224,0.417,0.444,0.437,0.380,0.003,0 -466,1.225,0.248,0.121,0.774,0.267,0.736,0 -467,1.467,0.950,0.315,0.109,0.942,0.826,0 -468,1.877,0.006,0.939,0.623,0.399,0.344,0 -469,1.887,0.202,0.124,0.740,0.043,0.296,0 -470,1.642,0.029,0.815,0.034,0.482,0.172,0 -471,1.579,0.424,0.952,0.344,0.248,0.503,0 -472,1.429,0.931,0.158,0.697,0.524,0.453,0 -473,1.460,0.354,0.619,0.820,0.582,0.924,0 -474,1.656,0.627,0.086,0.397,0.727,0.242,0 -475,1.274,0.555,0.678,0.676,0.199,0.029,0 -476,1.161,0.507,0.500,0.816,0.967,0.043,0 -477,1.893,0.859,0.394,0.368,0.279,0.181,0 -478,1.212,0.866,0.887,0.890,0.451,0.749,0 -479,1.377,0.567,0.371,0.857,0.221,0.353,0 -480,1.682,0.425,0.294,0.093,0.851,0.848,0 -481,1.342,0.039,0.087,0.367,0.954,0.694,0 -482,1.175,0.794,0.410,0.569,0.241,0.334,0 -483,1.901,0.787,0.778,0.945,0.386,0.155,0 -484,1.380,0.959,0.996,0.731,0.874,0.283,0 -485,1.073,0.795,0.410,0.117,0.745,0.148,0 -486,1.404,0.000,0.568,0.530,0.429,0.689,0 -487,1.767,0.398,0.375,0.527,0.473,0.043,0 -488,1.471,0.693,0.493,0.742,0.332,0.740,0 -489,1.523,0.086,0.421,0.674,0.872,0.033,0 -490,1.154,0.093,0.988,0.366,0.015,0.560,0 -491,1.292,0.051,0.670,0.766,0.532,0.863,0 -492,1.740,0.476,0.177,0.695,0.403,0.315,0 -493,1.222,0.598,0.025,0.294,0.023,0.290,0 -494,1.733,0.574,0.956,0.125,0.289,0.531,0 -495,1.116,0.845,0.568,0.246,0.102,0.295,0 -496,1.641,0.709,0.747,0.007,0.224,0.726,0 -497,1.196,0.594,0.007,0.016,0.488,0.920,0 -498,1.088,0.877,0.446,0.381,0.594,0.200,0 -499,1.343,0.518,0.015,0.139,0.177,0.261,0 -500,1.041,0.359,0.769,0.894,0.271,0.759,0 -501,1.382,0.288,0.847,0.617,0.778,0.635,0 -502,1.698,0.448,0.740,0.478,0.397,0.109,0 -503,1.149,0.445,0.468,0.830,0.151,0.357,0 -504,1.357,0.845,0.829,0.922,0.584,0.328,0 -505,1.854,0.983,0.484,0.678,0.833,0.891,0 -506,1.858,0.582,0.642,0.245,0.488,0.042,0 -507,1.130,0.223,0.938,0.026,0.303,0.148,0 -508,1.691,0.477,0.572,0.878,0.911,0.349,0 -509,1.369,0.029,0.737,0.824,0.320,0.800,0 -510,1.809,0.365,0.883,0.332,0.475,0.596,0 -511,1.402,0.351,0.472,0.840,0.137,0.434,0 -512,1.725,0.051,0.385,0.039,0.790,0.203,0 -513,1.888,0.418,0.270,0.941,0.189,0.637,0 -514,1.055,0.709,0.835,0.382,0.749,0.742,0 -515,1.820,0.443,0.738,0.812,0.264,0.527,0 -516,1.737,0.786,0.081,0.632,0.370,0.664,0 -517,1.462,0.810,0.975,0.371,0.112,0.233,0 -518,1.383,0.474,0.998,0.763,0.787,0.504,0 -519,1.558,0.176,0.892,0.428,0.324,0.432,0 -520,1.163,0.132,0.720,0.277,0.629,0.835,0 -521,1.999,0.272,0.505,0.564,0.927,0.626,0 -522,1.999,0.353,0.324,0.116,0.806,0.726,0 -523,1.077,0.088,0.687,0.586,0.412,0.462,0 -524,1.734,0.275,0.114,0.030,0.843,0.467,0 -525,1.094,0.453,0.440,0.890,0.645,0.148,0 -526,1.457,0.623,0.238,0.361,0.066,0.633,0 -527,1.428,0.695,0.381,0.024,0.871,0.895,0 -528,1.192,0.096,0.096,0.198,0.565,0.122,0 -529,1.002,0.261,0.996,0.854,0.141,0.765,0 -530,1.335,0.621,0.794,0.742,0.326,0.980,0 -531,1.057,0.310,0.355,0.611,0.242,0.822,0 -532,1.626,0.235,0.824,0.433,0.372,0.983,0 -533,1.129,0.325,0.312,0.975,0.573,0.264,0 -534,1.950,0.612,0.390,0.158,0.810,0.209,0 -535,1.290,0.690,0.022,0.986,0.295,0.970,0 -536,1.867,0.070,0.777,0.660,0.550,0.852,0 -537,1.455,0.350,0.102,0.555,0.167,0.176,0 -538,1.338,0.667,0.147,0.071,0.203,0.200,0 -539,1.640,0.104,0.688,0.457,0.344,0.859,0 -540,1.076,0.771,0.203,0.102,0.364,0.948,0 -541,1.969,0.722,0.463,0.307,0.839,0.655,0 -542,1.646,0.078,0.987,0.407,0.504,0.491,0 -543,1.383,0.197,0.842,0.972,0.864,0.501,0 -544,1.311,0.989,0.995,0.746,0.793,0.123,0 -545,1.605,0.888,0.699,0.043,0.146,0.703,0 -546,1.101,0.423,0.718,0.949,0.925,0.508,0 -547,1.450,0.121,0.934,0.631,0.283,0.857,0 -548,1.083,0.910,0.307,0.788,0.453,0.733,0 -549,1.891,0.369,0.657,0.298,0.078,0.913,0 -550,1.152,0.407,0.812,0.637,0.580,0.093,0 -551,1.406,0.796,0.216,0.627,0.024,0.381,0 -552,1.692,0.137,0.633,0.691,0.228,0.312,0 -553,1.330,0.761,0.647,0.507,0.468,0.236,0 -554,1.222,0.978,0.515,0.526,0.191,0.175,0 -555,1.583,0.995,0.199,0.006,0.490,0.147,0 -556,1.709,0.616,0.611,0.939,0.034,0.305,0 -557,1.422,0.315,0.534,0.385,0.581,0.993,0 -558,1.567,0.375,0.957,0.700,0.093,0.791,0 -559,1.542,0.539,0.602,0.219,0.767,0.003,0 -560,1.993,0.115,0.519,0.823,0.644,0.377,0 -561,1.157,0.099,0.358,0.421,0.157,0.566,0 -562,1.263,0.060,0.604,0.725,0.534,0.733,0 -563,1.561,0.711,0.645,0.776,0.301,0.774,0 -564,1.640,0.121,0.045,0.064,0.690,0.591,0 -565,1.619,0.874,0.229,0.095,0.226,0.236,0 -566,1.689,0.556,0.038,0.123,0.989,0.226,0 -567,1.943,0.772,0.891,0.003,0.409,0.429,0 -568,1.909,0.516,0.350,0.196,0.937,0.164,0 -569,1.232,0.497,0.540,0.753,0.103,0.018,0 -570,1.636,0.158,0.768,0.637,0.016,0.585,0 -571,1.152,0.339,0.649,0.763,0.959,0.903,0 -572,1.368,0.706,0.911,0.917,0.946,0.676,0 -573,1.456,0.037,0.875,0.523,0.811,0.468,0 -574,1.052,0.434,0.631,0.434,0.780,0.982,0 -575,1.061,0.504,0.630,0.675,0.940,0.067,0 -576,1.432,0.835,0.240,0.227,0.716,0.945,0 -577,1.717,0.964,0.856,0.241,0.743,0.949,0 -578,1.736,0.930,0.605,0.538,0.859,0.495,0 -579,1.747,0.756,0.264,0.819,0.873,0.681,0 -580,1.212,0.149,0.087,0.324,0.139,0.074,0 -581,1.904,0.768,0.317,0.943,0.214,0.511,0 -582,1.514,0.871,0.729,0.645,0.996,0.845,0 -583,1.653,0.059,0.072,0.030,0.613,0.992,0 -584,1.274,0.584,0.041,0.206,0.778,0.038,0 -585,1.849,0.769,0.748,0.355,0.408,0.076,0 -586,1.676,0.129,0.847,0.654,0.105,0.101,0 -587,1.398,0.297,0.827,0.563,0.238,0.936,0 -588,1.795,0.806,0.226,0.659,0.412,0.563,0 -589,1.016,0.221,0.477,0.443,0.965,0.323,0 -590,1.159,0.689,0.608,0.112,0.639,0.982,0 -591,1.147,0.438,0.255,0.056,0.178,0.147,0 -592,1.821,0.212,0.190,0.068,0.210,0.470,0 -593,1.454,0.394,0.953,0.458,0.458,0.545,0 -594,1.060,0.314,0.460,0.185,0.868,0.932,0 -595,1.079,0.485,0.251,0.414,0.718,0.471,0 -596,1.958,0.883,0.177,0.097,0.153,0.577,0 -597,1.471,0.025,0.414,0.154,0.227,0.675,0 -598,1.472,0.721,0.116,0.023,0.523,0.026,0 -599,1.484,0.829,0.209,0.749,0.271,0.915,0 -600,1.646,0.797,0.206,0.421,0.478,0.701,0 diff --git a/rdkit/ML/KNN/test_data/sample_pts.csv b/rdkit/ML/KNN/test_data/sample_pts.csv deleted file mode 100644 index 92254e8beaf..00000000000 --- a/rdkit/ML/KNN/test_data/sample_pts.csv +++ /dev/null @@ -1,13 +0,0 @@ -id,x,y,val -1,-4,3,1 -2,-3,3,1.5 -3,-4,2,1.5 -4,-3,2,1 -5,1.5,2,2 -6,3,3,1 -7,4,2,2 -8,3,1,1 -9,3,-1,-1 -10,2,-2.5,-2 -11,4,-2.5,-2 -12,3,-4,-1 diff --git a/rdkit/ML/ModelPackage/PackageUtils.py b/rdkit/ML/ModelPackage/PackageUtils.py deleted file mode 100644 index 834b6fa6ca1..00000000000 --- a/rdkit/ML/ModelPackage/PackageUtils.py +++ /dev/null @@ -1,143 +0,0 @@ -# -# Copyright (C) 2003 Rational Discovery LLC -# All rights are reserved. -# - -# from elementtree.ElementTree import ElementTree, Element, SubElement -import time -from xml.etree.ElementTree import Element, ElementTree, SubElement - - -def _ConvertModelPerformance(perf, modelPerf): - if len(modelPerf) > 3: - confMat = modelPerf[3] - accum = 0 - for row in confMat: - for entry in row: - accum += entry - accum = str(accum) - else: - confMat = None - accum = 'N/A' - - if len(modelPerf) > 4: - elem = SubElement(perf, "ScreenThreshold") - elem.text = str(modelPerf[4]) - elem = SubElement(perf, "NumScreened") - elem.text = accum - if len(modelPerf) > 4: - elem = SubElement(perf, "NumSkipped") - elem.text = str(modelPerf[6]) - elem = SubElement(perf, "Accuracy") - elem.text = str(modelPerf[0]) - elem = SubElement(perf, "AvgCorrectConf") - elem.text = str(modelPerf[1]) - elem = SubElement(perf, "AvgIncorrectConf") - elem.text = str(modelPerf[2]) - if len(modelPerf) > 4: - elem = SubElement(perf, "AvgSkipConf") - elem.text = str(modelPerf[5]) - if confMat: - elem = SubElement(perf, "ConfusionMatrix") - elem.text = str(confMat) - - -def PackageToXml(pkg, summary="N/A", trainingDataId='N/A', dataPerformance=[], - recommendedThreshold=None, classDescriptions=None, modelType=None, - modelOrganism=None): - """ generates XML for a package that follows the RD_Model.dtd - - If provided, dataPerformance should be a sequence of 2-tuples: - ( note, performance ) - where performance is of the form: - ( accuracy, avgCorrectConf, avgIncorrectConf, confusionMatrix, thresh, avgSkipConf, nSkipped ) - the last four elements are optional - - """ - head = Element("RDModelInfo") - name = SubElement(head, "ModelName") - notes = pkg.GetNotes() - if not notes: - notes = "Unnamed model" - name.text = notes - summ = SubElement(head, "ModelSummary") - summ.text = summary - calc = pkg.GetCalculator() - descrs = SubElement(head, "ModelDescriptors") - for name, summary, func in zip(calc.GetDescriptorNames(), calc.GetDescriptorSummaries(), - calc.GetDescriptorFuncs()): - descr = SubElement(descrs, "Descriptor") - elem = SubElement(descr, "DescriptorName") - elem.text = name - elem = SubElement(descr, "DescriptorDetail") - elem.text = summary - if hasattr(func, 'version'): - vers = SubElement(descr, "DescriptorVersion") - major, minor, patch = func.version.split('.') - elem = SubElement(vers, "VersionMajor") - elem.text = major - elem = SubElement(vers, "VersionMinor") - elem.text = minor - elem = SubElement(vers, "VersionPatch") - elem.text = patch - - elem = SubElement(head, "TrainingDataId") - elem.text = trainingDataId - - for description, perfData in dataPerformance: - dataNode = SubElement(head, "ValidationData") - note = SubElement(dataNode, 'ScreenNote') - note.text = description - perf = SubElement(dataNode, "PerformanceData") - _ConvertModelPerformance(perf, perfData) - - if recommendedThreshold: - elem = SubElement(head, "RecommendedThreshold") - elem.text = str(recommendedThreshold) - - if classDescriptions: - elem = SubElement(head, "ClassDescriptions") - for val, text in classDescriptions: - descr = SubElement(elem, 'ClassDescription') - valElem = SubElement(descr, 'ClassVal') - valElem.text = str(val) - valText = SubElement(descr, 'ClassText') - valText.text = str(text) - - if modelType: - elem = SubElement(head, "ModelType") - elem.text = modelType - if modelOrganism: - elem = SubElement(head, "ModelOrganism") - elem.text = modelOrganism - - hist = SubElement(head, "ModelHistory") - revision = SubElement(hist, "Revision") - tm = time.localtime() - date = SubElement(revision, "RevisionDate") - elem = SubElement(date, "Year") - elem.text = str(tm[0]) - elem = SubElement(date, "Month") - elem.text = str(tm[1]) - elem = SubElement(date, "Day") - elem.text = str(tm[2]) - note = SubElement(revision, "RevisionNote") - note.text = "Created" - return ElementTree(head) - - -if __name__ == '__main__': # pragma: nocover - import pickle - import sys - from io import StringIO - pkg = pickle.load(open(sys.argv[1], 'rb')) - perf = (.80, .95, .70, [[4, 1], [1, 4]]) - tree = PackageToXml(pkg, dataPerformance=[('training data performance', perf)]) - io = StringIO() - tree.write(io) - txt = io.getvalue() - header = """ - -""" - print(header) - print(txt.replace('><', '>\n<')) diff --git a/rdkit/ML/ModelPackage/Packager.py b/rdkit/ML/ModelPackage/Packager.py deleted file mode 100644 index db42d8839c8..00000000000 --- a/rdkit/ML/ModelPackage/Packager.py +++ /dev/null @@ -1,98 +0,0 @@ -# -# Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC -# All rights are reserved. -# - - -class DescriptorCalculationError(Exception): - """ used to signal problems generating descriptor values """ - pass - - -class ClassificationError(Exception): - """ used to signal problems generating predictions """ - pass - - -class ModelPackage(object): - """ a container class to package a composite model with a descriptor - calculator so that objects needing predictions (compounds, molecules, etc.) - can be passed directly in without worrying about generating descriptors - - """ - - def __init__(self, descCalc=None, model=None, dataSet=None, notes=''): - self._descCalc = descCalc - self._model = model - self._notes = notes - self._dataSet = dataSet - self._initialized = 0 - self._supplementalData = [] - - def SetCalculator(self, calc): - self._descCalc = calc - - def GetCalculator(self): - return self._descCalc - - def SetModel(self, model): - self._model = model - - def GetModel(self): - return self._model - - def SetDataset(self, data): - self._dataSet = data - - def GetDataset(self): - return self._dataSet - - def SetNotes(self, notes): - self._notes = notes - - def GetNotes(self): - return self._notes - - def SetSupplementalData(self, suppD): - self._supplementalData = suppD - - def GetSupplementalData(self): - if not hasattr(self, '_supplementalData'): - self.SetSupplementalData([]) - return self._supplementalData - - def AddSupplementalData(self, data): - if not hasattr(self, '_supplementalData'): - self.SetSupplementalData([]) - self._supplementalData.append(data) - - def Classify(self, obj, label='', threshold=0): - if not self._initialized: - self.Init() - try: - descs = self._descCalc.CalcDescriptors(obj) - except Exception: - raise DescriptorCalculationError('problems encountered generating descriptors') - - argVect = [label] + list(descs) + [0] - try: - res = self._model.ClassifyExample(argVect, threshold=threshold, appendExample=0) - except Exception: - import traceback - traceback.print_exc() - raise ClassificationError('problems encountered generating prediction') - - return res - - def Init(self): - if self._model is None or self._descCalc is None: - return - - nms = self._model.GetDescriptorNames() - lbl = nms[0] - act = nms[-1] - descs = self._descCalc.GetDescriptorNames() - order = [lbl] + list(descs) + [act] - self._model.SetInputOrder(order) - - self._initialized = 1 diff --git a/rdkit/ML/ModelPackage/UnitTestPackage.py b/rdkit/ML/ModelPackage/UnitTestPackage.py deleted file mode 100644 index c0a739e1028..00000000000 --- a/rdkit/ML/ModelPackage/UnitTestPackage.py +++ /dev/null @@ -1,178 +0,0 @@ -# -# Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC -# -""" unit tests for the model and descriptor packager """ -import os -import pickle -import unittest -from io import BytesIO -from xml.dom import minidom -from xml.etree import ElementTree as ET - -from rdkit import Chem, RDConfig -from rdkit import RDRandom as random -from rdkit.Chem import Descriptors -from rdkit.ML.Composite import Composite -from rdkit.ML.Data import DataUtils -from rdkit.ML.Descriptors.MoleculeDescriptors import \ - MolecularDescriptorCalculator -from rdkit.ML.ModelPackage import Packager, PackageUtils -from rdkit.ML.ModelPackage.Packager import ModelPackage - - -def feq(a, b, tol=1e-4): - return abs(a - b) <= tol - - -class TestCase(unittest.TestCase): - - def setUp(self): - self.dataDir = os.path.join(RDConfig.RDCodeDir, 'ML/ModelPackage/test_data') - self.testD = [ - # NOTE: the confidences here can be twitchy due to changes in descriptors: - ('Fc1ccc(NC(=O)c2cccnc2Oc3cccc(c3)C(F)(F)F)c(F)c1', 0, 0.8), - # (r'CN/1(=C\C=C(/C=C1)\C\2=C\C=N(C)(Cl)\C=C2)Cl',0,0.70), - (r'NS(=O)(=O)c1cc(ccc1Cl)C2(O)NC(=O)c3ccccc32', 1, 0.70), - ] - - def _loadPackage(self): - with open(os.path.join(self.dataDir, 'Jan9_build3_pkg.pkl'), 'r') as pkgTF: - buf = pkgTF.read().replace('\r\n', '\n').encode('utf-8') - pkgTF.close() - io = BytesIO(buf) - pkg = pickle.load(io) - return pkg - - def _verify(self, pkg, testD): - for smi, pred, conf in testD: - m = Chem.MolFromSmiles(smi) - self.assertTrue(m is not None, 'SMILES: %s failed\n' % (smi)) - p, c = pkg.Classify(m) - assert p == pred, 'bad prediction (%d) for smiles %s' % (p, smi) - assert feq(c, conf), 'bad confidence (%f) for smiles %s' % (c, smi) - - def _verify2(self, pkg, testD): - for smi, pred, conf in testD: - m = Chem.MolFromSmiles(smi) - self.assertTrue(m is not None, 'SMILES: %s failed\n' % (smi)) - p, c = pkg.Classify(m) - assert p == pred, 'bad prediction (%d) for smiles %s' % (p, smi) - assert feq(c, conf), 'bad confidence (%f) for smiles %s' % (c, smi) - p, c = pkg.Classify(m) - assert p == pred, 'bad prediction (%d) for smiles %s' % (p, smi) - assert feq(c, conf), 'bad confidence (%f) for smiles %s' % (c, smi) - - def testBuild(self): - # """ tests building and screening a packager """ - with open(os.path.join(self.dataDir, 'Jan9_build3_calc.dsc'), 'r') as calcTF: - buf = calcTF.read().replace('\r\n', '\n').encode('utf-8') - calcTF.close() - calc = pickle.load(BytesIO(buf)) - with open(os.path.join(self.dataDir, 'Jan9_build3_model.pkl'), 'rb') as modelF: - model = pickle.load(modelF) - pkg = Packager.ModelPackage(descCalc=calc, model=model) - self._verify(pkg, self.testD) - - def testLoad(self): - # """ tests loading and screening a packager """ - pkg = self._loadPackage() - self._verify(pkg, self.testD) - - def testLoad2(self): - # """ tests loading and screening a packager 2 """ - pkg = self._loadPackage() - self._verify2(pkg, self.testD) - - def testPerm1(self): - # """ tests the descriptor remapping stuff in a packager """ - pkg = self._loadPackage() - calc = pkg.GetCalculator() - names = calc.GetDescriptorNames() - ref = {} - DataUtils.InitRandomNumbers((23, 42)) - for smi, _, _ in self.testD: - for desc in names: - fn = getattr(Descriptors, desc, lambda x: 777) - m = Chem.MolFromSmiles(smi) - ref[desc] = fn(m) - - for _ in range(5): - perm = list(names) - random.shuffle(perm, random=random.random) - - m = Chem.MolFromSmiles(smi) - for desc in perm: - fn = getattr(Descriptors, desc, lambda x: 777) - val = fn(m) - assert feq(val, ref[desc], - 1e-4), '%s: %s(%s): %f!=%f' % (str(perm), smi, desc, val, ref[desc]) - - def testPerm2(self): - # """ tests the descriptor remapping stuff in a packager """ - pkg = self._loadPackage() - calc = pkg.GetCalculator() - names = calc.GetDescriptorNames() - DataUtils.InitRandomNumbers((23, 42)) - perm = list(names) - random.shuffle(perm, random=random.random) - calc.simpleList = perm - calc.descriptorNames = perm - pkg.Init() - self._verify(pkg, self.testD) - - def test_ModelPackage(self): - pkg = self._loadPackage() - - self.assertTrue(isinstance(pkg.GetCalculator(), MolecularDescriptorCalculator)) - pkg.SetCalculator('calculator') - self.assertEqual(pkg.GetCalculator(), 'calculator') - - self.assertTrue(isinstance(pkg.GetModel(), Composite.Composite)) - pkg.SetModel('model') - self.assertEqual(pkg.GetModel(), 'model') - - self.assertEqual(pkg.GetDataset(), None) - pkg.SetDataset('dataset') - self.assertEqual(pkg.GetDataset(), 'dataset') - - self.assertEqual(pkg.GetNotes(), 'General purpose model built from PhysProp data') - pkg.SetNotes('notes') - self.assertEqual(pkg.GetNotes(), 'notes') - - # Here seems to be a difference between Python 2 and 3. The next assert works in Python 3, - # but fails in Python 2 - # self.assertFalse(hasattr(pkg, '_supplementalData')) - self.assertEqual(pkg.GetSupplementalData(), []) - self.assertTrue(hasattr(pkg, '_supplementalData')) - - delattr(pkg, '_supplementalData') - pkg.AddSupplementalData('supp1') - self.assertTrue(hasattr(pkg, '_supplementalData')) - self.assertEqual(pkg.GetSupplementalData(), ['supp1']) - pkg.AddSupplementalData('supp2') - self.assertEqual(pkg.GetSupplementalData(), ['supp1', 'supp2']) - - pkg = ModelPackage() - self.assertFalse(pkg._initialized) - pkg.Init() - self.assertFalse(pkg._initialized) - - def test_PackageUtils(self): - pkg = self._loadPackage() - xml = PackageUtils.PackageToXml( - pkg, dataPerformance=[ - ('label', ['accuracy', 'avgCorrect', 'avgIncorrect']), - ], recommendedThreshold=0.2, classDescriptions=[('a', 'texta'), ('b', 'textb')], - modelType='model type', modelOrganism='model organism') - s = prettyXML(xml.getroot()) - self.assertIn('', s) - - -def prettyXML(xml): - s = ET.tostring(xml, encoding='utf-8') - tree = minidom.parseString(s) - return tree.toprettyxml(indent=' ') - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/ModelPackage/__init__.py b/rdkit/ML/ModelPackage/__init__.py deleted file mode 100644 index eeba8e62bbd..00000000000 --- a/rdkit/ML/ModelPackage/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) diff --git a/rdkit/ML/ModelPackage/test_data/Jan9_build3_calc.dsc b/rdkit/ML/ModelPackage/test_data/Jan9_build3_calc.dsc deleted file mode 100644 index d49de69b245..00000000000 --- a/rdkit/ML/ModelPackage/test_data/Jan9_build3_calc.dsc +++ /dev/null @@ -1,61 +0,0 @@ -(irdkit.ML.Descriptors.MoleculeDescriptors -MolecularDescriptorCalculator -p1 -(dp2 -S'simpleList' -p3 -(lp4 -S'BalabanJ' -p5 -aS'BertzCT' -p6 -aS'GordonScantleburyB2' -p7 -aS'HallKierAlpha' -p8 -aS'Kappa3' -p9 -aS'SuperPendenticIndex' -p10 -aS'PEOE_VSA8' -p11 -aS'PEOE_VSA9' -p12 -aS'SMR_VSA7' -p13 -aS'SlogP_VSA1' -p14 -aS'SlogP_VSA3' -p15 -aS'SlogP_VSA6' -p16 -aS'TPSA' -p17 -aS'NumHAcceptors' -p18 -aS'NumHDonors' -p19 -aS'MolLogP' -p20 -asS'descriptorNames' -p21 -(lp22 -g5 -ag6 -ag7 -ag8 -ag9 -ag10 -ag11 -ag12 -ag13 -ag14 -ag15 -ag16 -ag17 -ag18 -ag19 -ag20 -asS'compoundList' -p23 -Nsb. \ No newline at end of file diff --git a/rdkit/ML/ModelPackage/test_data/Jan9_build3_model.pkl b/rdkit/ML/ModelPackage/test_data/Jan9_build3_model.pkl deleted file mode 100644 index 0c9e8b3d49f..00000000000 Binary files a/rdkit/ML/ModelPackage/test_data/Jan9_build3_model.pkl and /dev/null differ diff --git a/rdkit/ML/ModelPackage/test_data/Jan9_build3_pkg.pkl b/rdkit/ML/ModelPackage/test_data/Jan9_build3_pkg.pkl deleted file mode 100644 index 90e524f07f1..00000000000 --- a/rdkit/ML/ModelPackage/test_data/Jan9_build3_pkg.pkl +++ /dev/null @@ -1,5178 +0,0 @@ -(irdkit.ML.ModelPackage.Packager -ModelPackage -p1 -(dp2 -S'_model' -p3 -(irdkit.ML.Composite.Composite -Composite -p4 -(dp5 -S'_mapOrder' -(lp6 -I0 -aI1 -aI2 -aI3 -aI4 -aI5 -aI6 -aI7 -aI8 -aI9 -aI10 -aI11 -aI12 -aI13 -aI14 -aI15 -aI16 -aI17 -asS'_descNames' -(lp7 -S'CAS' -aS'BALABANJ' -p8 -aS'BERTZCT' -p9 -aS'GORDONSCANTLEBURYB2' -p10 -aS'HALLKIERALPHA' -p11 -aS'KAPPA3' -p12 -aS'SUPERPENDENTICINDEX' -p13 -aS'PEOE_VSA8' -p14 -aS'PEOE_VSA9' -p15 -aS'SMR_VSA7' -p16 -aS'SLOGP_VSA1' -p17 -aS'SLOGP_VSA3' -p18 -aS'SLOGP_VSA6' -p19 -aS'TPSA' -p20 -aS'NUMHACCEPTORS' -p21 -aS'NUMHDONORS' -p22 -aS'MOLLOGP' -p23 -aS'SOLUBILITY_52' -asS'modelVotes' -(lp24 -I1 -aI1 -aI1 -aI1 -aI1 -aI1 -aI0 -aI1 -aI0 -aI0 -asS'modelList' -(lp25 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp26 -S'_nResultCodes' -p27 -I2 -sS'testExamples' -p28 -(lsS'parent' -p29 -NsS'level' -p30 -I0 -sS'qBounds' -p31 -(lp32 -F3.31385 -asS'badExamples' -p33 -(lsS'label' -p34 -I16 -sS'_gridName' -p35 -S'Model 0' -sS'terminalNode' -p36 -I0 -sS'trainingExamples' -p37 -(lsS'examples' -p38 -(lsS'_varNames' -p39 -g7 -sS'data' -p40 -F0.11667328488530351 -sS'children' -p41 -(lp42 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp43 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp44 -F558.15667155000006 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp45 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp46 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp47 -F28.851845480000001 -asg33 -(lsg34 -I8 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp48 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp49 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsS'name' -p50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp51 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sbasg50 -g15 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp52 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp53 -F1.5 -asg33 -(lsg34 -I15 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp54 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp55 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp56 -F56.801783534999998 -asg33 -(lsg34 -I7 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp57 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp58 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp59 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g14 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp60 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp61 -F12.274867024999999 -asg33 -(lsg34 -I11 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp62 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp63 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp64 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g18 -sbasg50 -g22 -sbasg50 -g9 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp65 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp66 -F5.6921478785000001 -asg33 -(lsg34 -I11 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp67 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp68 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp69 -F10.407385399999999 -asg33 -(lsg34 -I8 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp70 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp71 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp72 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp73 -F1210.7471745 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp74 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp75 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp76 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g9 -sbasg50 -g15 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp77 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp78 -F826.27129644999991 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp79 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp80 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp81 -F1.5339912280000001 -asg33 -(lsg34 -I3 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp82 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp83 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp84 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g10 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp85 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp86 -F60.500482730000002 -asg33 -(lsg34 -I7 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp87 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp88 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp89 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sbasg50 -g14 -sbasg50 -g9 -sbasg50 -g18 -sbasg50 -g23 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp90 -g27 -I2 -sg28 -(lsg29 -Nsg30 -I0 -sg31 -(lp91 -F3.3087999999999997 -asg33 -(lsg34 -I16 -sg35 -S'Model 1' -sg36 -I0 -sg37 -(lsg38 -(lsg39 -g7 -sg40 -F0.12763620509670603 -sg41 -(lp92 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp93 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp94 -F1.6006441224999999 -asg33 -(lsg34 -I3 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp95 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp96 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp97 -F24.260892545000001 -asg33 -(lsg34 -I12 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp98 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp99 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp100 -F169.23205915 -asg33 -(lsg34 -I6 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp101 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp102 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp103 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g13 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp104 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g19 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp105 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp106 -F60.824333304999996 -asg33 -(lsg34 -I9 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp107 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp108 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp109 -F2.9859810365000001 -asg33 -(lsg34 -I5 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp110 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp111 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp112 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sbasg50 -g12 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp113 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp114 -F548.60097029999997 -asg33 -(lsg34 -I6 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp115 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp116 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp117 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g13 -sbasg50 -g16 -sbasg50 -g10 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp118 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp119 -F12.59365536 -asg33 -(lsg34 -I11 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp120 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp121 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp122 -F1210.7471745 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp123 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp124 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp125 -F1.5108695650000001 -asg33 -(lsg34 -I3 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp126 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp127 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp128 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g10 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp129 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g9 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp130 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp131 -F589.88781184999993 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp132 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp133 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp134 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp135 -F20.78888839 -asg33 -(lsg34 -I7 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp136 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp137 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp138 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g14 -sbasg50 -g9 -sbasg50 -g18 -sbasg50 -g23 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp139 -g27 -I2 -sg28 -(lsg29 -Nsg30 -I0 -sg31 -(lp140 -F66.932736759999997 -asg33 -(lsg34 -I9 -sg35 -S'Model 2' -sg36 -I0 -sg37 -(lsg38 -(lsg39 -g7 -sg40 -F0.14566757026571775 -sg41 -(lp141 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp142 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp143 -F538.79279855000004 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp144 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp145 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp146 -F6.5758191855000003 -asg33 -(lsg34 -I10 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp147 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp148 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp149 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp150 -F23.493225514999999 -asg33 -(lsg34 -I12 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp151 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp152 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp153 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g19 -sbasg50 -g17 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp154 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp155 -F53.250694754999998 -asg33 -(lsg34 -I7 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp156 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp157 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp158 -F5.5 -asg33 -(lsg34 -I14 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp159 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp160 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp161 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g21 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp162 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp163 -F25.198875170000001 -asg33 -(lsg34 -I8 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp164 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp165 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp166 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g15 -sbasg50 -g14 -sbasg50 -g9 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp167 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp168 -F82.900000000000006 -asg33 -(lsg34 -I13 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp169 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp170 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp171 -F46.730982259999998 -asg33 -(lsg34 -I12 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp172 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp173 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp174 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp175 -F556.98071164999999 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp176 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp177 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp178 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g9 -sbasg50 -g19 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp179 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp180 -F455.00025270000003 -asg33 -(lsg34 -I6 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp181 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp182 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp183 -F1.7704678364999999 -asg33 -(lsg34 -I3 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp184 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp185 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp186 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sbasg50 -g10 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp187 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g13 -sbasg50 -g20 -sbasg50 -g16 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp188 -g27 -I2 -sg28 -(lsg29 -Nsg30 -I0 -sg31 -(lp189 -F1.5108695650000001 -asg33 -(lsg34 -I3 -sg35 -S'Model 3' -sg36 -I0 -sg37 -(lsg38 -(lsg39 -g7 -sg40 -F0.12284360759996857 -sg41 -(lp190 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp191 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp192 -F54.180088325 -asg33 -(lsg34 -I9 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp193 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp194 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp195 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp196 -F56.70721588 -asg33 -(lsg34 -I6 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp197 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp198 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp199 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp200 -F3.27718 -asg33 -(lsg34 -I16 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp201 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp202 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp203 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g23 -sbasg50 -g13 -sbasg50 -g16 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp204 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp205 -F3.3264100000000001 -asg33 -(lsg34 -I16 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp206 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp207 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp208 -F9.1075618985000002 -asg33 -(lsg34 -I10 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp209 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp210 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp211 -F29.123174034999998 -asg33 -(lsg34 -I8 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp212 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp213 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp214 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sbasg50 -g15 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp215 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp216 -F-1.645 -asg33 -(lsg34 -I4 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp217 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp218 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp219 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g11 -sbasg50 -g17 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp220 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp221 -F875.21043505 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp222 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp223 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp224 -F2.7149018900000002 -asg33 -(lsg34 -I5 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp225 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp226 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp227 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g12 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp228 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp229 -F3.8608328950000002 -asg33 -(lsg34 -I5 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp230 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp231 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp232 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g12 -sbasg50 -g9 -sbasg50 -g23 -sbasg50 -g10 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp233 -g27 -I2 -sg28 -(lsg29 -Nsg30 -I0 -sg31 -(lp234 -F3.3087999999999997 -asg33 -(lsg34 -I16 -sg35 -S'Model 4' -sg36 -I0 -sg37 -(lsg38 -(lsg39 -g7 -sg40 -F0.17211888908149761 -sg41 -(lp235 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp236 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp237 -F522.61940019999997 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp238 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp239 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp240 -F28.981998660000002 -asg33 -(lsg34 -I8 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp241 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp242 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp243 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sbasg50 -g15 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp244 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp245 -F20.108317755000002 -asg33 -(lsg34 -I11 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp246 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp247 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp248 -F455.00025270000003 -asg33 -(lsg34 -I6 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp249 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp250 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp251 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sbasg50 -g13 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp252 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g18 -sbasg50 -g9 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp253 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp254 -F52.806064840000005 -asg33 -(lsg34 -I9 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp255 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp256 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp257 -F523.42567700000006 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp258 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp259 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp260 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp261 -F53.250694754999998 -asg33 -(lsg34 -I7 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp262 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp263 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp264 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sbasg50 -g14 -sbasg50 -g9 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp265 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp266 -F791.7438214 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp267 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp268 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp269 -F50.003077590000004 -asg33 -(lsg34 -I12 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp270 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp271 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp272 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g19 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp273 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp274 -F24.551471185 -asg33 -(lsg34 -I7 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp275 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp276 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp277 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g14 -sbasg50 -g9 -sbasg50 -g16 -sbasg50 -g23 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp278 -g27 -I2 -sg28 -(lsg29 -Nsg30 -I0 -sg31 -(lp279 -F67.618558984999993 -asg33 -(lsg34 -I9 -sg35 -S'Model 5' -sg36 -I0 -sg37 -(lsg38 -(lsg39 -g7 -sg40 -F0.14085807453205829 -sg41 -(lp280 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp281 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp282 -F537.83317065000006 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp283 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp284 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp285 -F6.5758191855000003 -asg33 -(lsg34 -I10 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp286 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp287 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp288 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp289 -F40.144999999999996 -asg33 -(lsg34 -I13 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp290 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp291 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp292 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g20 -sbasg50 -g17 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp293 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp294 -F53.187173690000002 -asg33 -(lsg34 -I7 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp295 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp296 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp297 -F2.6486323834999999 -asg33 -(lsg34 -I5 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp298 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp299 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp300 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sbasg50 -g12 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp301 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp302 -F118.51628705 -asg33 -(lsg34 -I6 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp303 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp304 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp305 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sbasg50 -g13 -sbasg50 -g14 -sbasg50 -g9 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp306 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp307 -F2.0152541385 -asg33 -(lsg34 -I5 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp308 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp309 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp310 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp311 -F82.314999999999998 -asg33 -(lsg34 -I13 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp312 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp313 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp314 -F1.5355392160000001 -asg33 -(lsg34 -I3 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp315 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp316 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp317 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g10 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp318 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp319 -F2.3972685920000001 -asg33 -(lsg34 -I10 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp320 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp321 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp322 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sbasg50 -g17 -sbasg50 -g20 -sbasg50 -g12 -sbasg50 -g16 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp323 -g27 -I2 -sg28 -(lsg29 -Nsg30 -I0 -sg31 -(lp324 -F69.796199099999995 -asg33 -(lsg34 -I9 -sg35 -S'Model 6' -sg36 -I0 -sg37 -(lsg38 -(lsg39 -g7 -sg40 -F0.11684335588896563 -sg41 -(lp325 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp326 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp327 -F538.79279855000004 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp328 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp329 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp330 -F6.5758191855000003 -asg33 -(lsg34 -I10 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp331 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp332 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp333 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp334 -F23.493225514999999 -asg33 -(lsg34 -I12 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp335 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp336 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp337 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g19 -sbasg50 -g17 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp338 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp339 -F87.310000000000002 -asg33 -(lsg34 -I13 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp340 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp341 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp342 -F2.6431706960000003 -asg33 -(lsg34 -I5 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp343 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp344 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp345 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g12 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp346 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp347 -F3.1626599999999998 -asg33 -(lsg34 -I16 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp348 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp349 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp350 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g23 -sbasg50 -g20 -sbasg50 -g9 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp351 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp352 -F1.7576345600000001 -asg33 -(lsg34 -I5 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp353 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp354 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp355 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp356 -F-3.2599999999999998 -asg33 -(lsg34 -I4 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp357 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp358 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp359 -F8.6584432215000007 -asg33 -(lsg34 -I10 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp360 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp361 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp362 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g17 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp363 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp364 -F2.4208487745 -asg33 -(lsg34 -I1 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp365 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp366 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp367 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sbasg50 -g8 -sbasg50 -g11 -sbasg50 -g12 -sbasg50 -g16 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp368 -g27 -I2 -sg28 -(lsg29 -Nsg30 -I0 -sg31 -(lp369 -F3.3264100000000001 -asg33 -(lsg34 -I16 -sg35 -S'Model 7' -sg36 -I0 -sg37 -(lsg38 -(lsg39 -g7 -sg40 -F0.13011121118740684 -sg41 -(lp370 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp371 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp372 -F59.412089965 -asg33 -(lsg34 -I9 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp373 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp374 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp375 -F558.15667155000006 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp376 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp377 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp378 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp379 -F2.6476037235000001 -asg33 -(lsg34 -I5 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp380 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp381 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp382 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sbasg50 -g12 -sbasg50 -g9 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp383 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp384 -F35.77103469 -asg33 -(lsg34 -I12 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp385 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp386 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp387 -F2.7966441944999998 -asg33 -(lsg34 -I1 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp388 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp389 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp390 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g8 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp391 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp392 -F580.67213100000004 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp393 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp394 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp395 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sbasg50 -g9 -sbasg50 -g19 -sbasg50 -g16 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp396 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp397 -F6.2055325650000004 -asg33 -(lsg34 -I5 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp398 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp399 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp400 -F116.985 -asg33 -(lsg34 -I13 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp401 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp402 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp403 -F2.4377605129999997 -asg33 -(lsg34 -I1 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp404 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp405 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp406 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sbasg50 -g8 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp407 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g20 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp408 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g12 -sbasg50 -g23 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp409 -g27 -I2 -sg28 -(lsg29 -Nsg30 -I0 -sg31 -(lp410 -F18.028982294999999 -asg33 -(lsg34 -I11 -sg35 -S'Model 8' -sg36 -I0 -sg37 -(lsg38 -(lsg39 -g7 -sg40 -F0.14836268866751995 -sg41 -(lp411 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp412 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp413 -F3.3087999999999997 -asg33 -(lsg34 -I16 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp414 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp415 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp416 -F521.05756450000001 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp417 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp418 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp419 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp420 -F1.630681818 -asg33 -(lsg34 -I3 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp421 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp422 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp423 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g10 -sbasg50 -g9 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp424 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp425 -F1.751354243 -asg33 -(lsg34 -I5 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp426 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp427 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp428 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp429 -F512.33654494999996 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp430 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp431 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp432 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g9 -sbasg50 -g12 -sbasg50 -g23 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp433 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp434 -F372.42855120000002 -asg33 -(lsg34 -I6 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp435 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp436 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp437 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp438 -F-1.3149999999999999 -asg33 -(lsg34 -I4 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp439 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp440 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp441 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp442 -F2.7789246754999999 -asg33 -(lsg34 -I1 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp443 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp444 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp445 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g8 -sbasg50 -g11 -sbasg50 -g13 -sbasg50 -g18 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp446 -g27 -I2 -sg28 -(lsg29 -Nsg30 -I0 -sg31 -(lp447 -F3.3087999999999997 -asg33 -(lsg34 -I16 -sg35 -S'Model 9' -sg36 -I0 -sg37 -(lsg38 -(lsg39 -g7 -sg40 -F0.1527774273164004 -sg41 -(lp448 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp449 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp450 -F522.61940019999997 -asg33 -(lsg34 -I2 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp451 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp452 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp453 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp454 -F20.108317755000002 -asg33 -(lsg34 -I11 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp455 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp456 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp457 -F-3.1399999999999997 -asg33 -(lsg34 -I4 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp458 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp459 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp460 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g11 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp461 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sbasg50 -g18 -sbasg50 -g9 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp462 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp463 -F13.722976790000001 -asg33 -(lsg34 -I11 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp464 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp465 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp466 -F0.5 -asg33 -(lsg34 -I15 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp467 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp468 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp469 -F2.5671851975000002 -asg33 -(lsg34 -I5 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp470 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp471 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp472 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g12 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp473 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp474 -F10.959410784999999 -asg33 -(lsg34 -I10 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp475 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp476 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp477 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0?' -sbasg50 -g17 -sbasg50 -g22 -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp478 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp479 -F1.5527272724999999 -asg33 -(lsg34 -I3 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp480 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp481 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp482 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lp483 -F53.187173690000002 -asg33 -(lsg34 -I7 -sg36 -I0 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lp484 -(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp485 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I1 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'1?' -sba(irdkit.ML.DecTree.QuantTree -QuantTreeNode -(dp486 -g28 -(lsg29 -Nsg30 -I1 -sg31 -(lsg33 -(lsg34 -I0 -sg36 -I1 -sg37 -(lsg38 -(lsg40 -I-666 -sg41 -(lsg50 -S'0' -sbasg50 -g14 -sbasg50 -g10 -sbasg50 -g18 -sbasg50 -g23 -sbasS'_randomSeed' -(I156218 -I774779 -tsS'nPossibleVals' -(lp487 -I0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI17 -aI8 -aI0 -aI2 -asg39 -g7 -sS'quantBounds' -NsS'_splitFrac' -F0.69999999999999996 -sS'quantizationRequirements' -(lp488 -I0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -aI0 -asS'countList' -(lp489 -I1 -aI1 -aI1 -aI1 -aI1 -aI1 -aI1 -aI1 -aI1 -aI1 -asS'errList' -(lp490 -F0.15596330275229359 -aF0.16055045871559634 -aF0.1743119266055046 -aF0.17889908256880735 -aF0.1834862385321101 -aF0.19266055045871561 -aF0.20642201834862386 -aF0.21100917431192662 -aF0.21559633027522937 -aF0.22477064220183487 -asbsS'_notes' -p491 -S'General purpose model built from PhysProp data' -p492 -sS'_initialized' -p493 -I1 -sS'_descCalc' -p494 -(irdkit.ML.Descriptors.MoleculeDescriptors -MolecularDescriptorCalculator -p495 -(dp496 -S'simpleList' -p497 -(lp498 -S'BalabanJ' -p499 -aS'BertzCT' -p500 -aS'GordonScantleburyB2' -p501 -aS'HallKierAlpha' -p502 -aS'Kappa3' -p503 -aS'SuperPendenticIndex' -p504 -aS'PEOE_VSA8' -p505 -aS'PEOE_VSA9' -p506 -aS'SMR_VSA7' -p507 -aS'SlogP_VSA1' -p508 -aS'SlogP_VSA3' -p509 -aS'SlogP_VSA6' -p510 -aS'TPSA' -p511 -aS'NumHAcceptors' -p512 -aS'NumHDonors' -p513 -aS'MolLogP' -p514 -asS'descriptorNames' -p515 -(lp516 -g499 -ag500 -ag501 -ag502 -ag503 -ag504 -ag505 -ag506 -ag507 -ag508 -ag509 -ag510 -ag511 -ag512 -ag513 -ag514 -asS'compoundList' -p517 -NsbsS'_dataSet' -p518 -Nsb. diff --git a/rdkit/ML/NaiveBayes/ClassificationModel.py b/rdkit/ML/NaiveBayes/ClassificationModel.py deleted file mode 100644 index c7285f13bbb..00000000000 --- a/rdkit/ML/NaiveBayes/ClassificationModel.py +++ /dev/null @@ -1,279 +0,0 @@ -# $Id$ -# -# Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC -# All Rights Reserved -# -""" Defines Naive Baysean classification model - Based on development in: Chapter 6 of "Machine Learning" by Tom Mitchell - -""" -import numpy - -from rdkit.ML.Data import Quantize - - -def _getBinId(val, qBounds): - bid = 0 - for bnd in qBounds: - if (val > bnd): - bid += 1 - return bid - - -# FIX: this class has not been updated to new-style classes -# (RD Issue380) because that would break all of our legacy pickled -# data. Until a solution is found for this breakage, an update is -# impossible. -class NaiveBayesClassifier: - """ - _NaiveBayesClassifier_s can save the following pieces of internal state, accessible via - standard setter/getter functions: - - 1) _Examples_: a list of examples which have been predicted - - 2) _TrainingExamples_: List of training examples - the descriptor value of these examples - are quantized based on info gain using ML/Data/Quantize.py if necessary - - 3) _TestExamples_: the list of examples used to test the model - - 4) _BadExamples_ : list of examples that were incorrectly classified - - 4) _QBoundVals_: Quant bound values for each varaible - a list of lists - - 5) _QBounds_ : Number of bounds for each variable - - """ - - def __init__(self, attrs, nPossibleVals, nQuantBounds, mEstimateVal=-1.0, useSigs=False): - """ Constructor - - """ - self._attrs = attrs - self._mEstimateVal = mEstimateVal - self._useSigs = useSigs - - self._classProbs = {} - - self._examples = [] - self._trainingExamples = [] - self._testExamples = [] - self._badExamples = [] - self._QBoundVals = {} - self._nClasses = nPossibleVals[-1] - self._qBounds = nQuantBounds - self._nPosVals = nPossibleVals - self._needsQuant = 1 - - self._name = "" - self.mprob = -1.0 - - # for the sake a of efficiency lets try to change the conditional probabilities - # to a numpy array instead of a dictionary. The three dimension array is indexed - # on the activity class, the descriptor ID and the descriptor binID - # self._condProbs = {} - # self._condProbs = numpy.zeros((self._nClasses, max(self._attrs)+1, - # max(self._nPosVals)+1), 'd') - self._condProbs = [None] * self._nClasses - for i in range(self._nClasses): - if not (hasattr(self, '_useSigs') and self._useSigs): - nA = max(self._attrs) + 1 - self._condProbs[i] = [None] * nA - for j in range(nA): - nV = self._nPosVals[j] - if self._qBounds[j]: - nV = max(nV, self._qBounds[j] + 1) - self._condProbs[i][j] = [0.0] * nV - else: - self._condProbs[i] = {} - for idx in self._attrs: - self._condProbs[i][idx] = [0.0] * 2 - - def GetName(self): - return self._name - - def SetName(self, name): - self._name = name - - def NameModel(self, varNames): - self.SetName('NaiveBayesClassifier') - - def GetExamples(self): - return self._examples - - def SetExamples(self, examples): - self._examples = examples - - def GetTrainingExamples(self): - return self._trainingExamples - - def SetTrainingExamples(self, examples): - self._trainingExamples = examples - - def GetTestExamples(self): - return self._testExamples - - def SetTestExamples(self, examples): - self._testExamples = examples - - def SetBadExamples(self, examples): - self._badExamples = examples - - def GetBadExamples(self): - return self._badExamples - - def _computeQuantBounds(self): - neg = len(self._trainingExamples) - natr = len(self._attrs) - - # make a list of results and values - allVals = numpy.zeros((neg, natr), 'd') - res = [] # list of y values - i = 0 - for eg in self._trainingExamples: - res.append(eg[-1]) - j = 0 - for ai in self._attrs: - val = eg[ai] - allVals[i, j] = val - j += 1 - i += 1 - - # now loop over each of the columns and compute the bounds - # the number of bounds is determined by the maximum info gain - i = 0 - for ai in self._attrs: - nbnds = self._qBounds[ai] - if nbnds > 0: - mbnds = [] - mgain = -1.0 - - for j in range(1, nbnds + 1): - bnds, igain = Quantize.FindVarMultQuantBounds(allVals[:, i], j, res, self._nClasses) - if (igain > mgain): - mbnds = bnds - mgain = igain - self._QBoundVals[ai] = mbnds - i += 1 - - def trainModel(self): - """ We will assume at this point that the training examples have been set - - We have to estmate the conditional probabilities for each of the (binned) descriptor - component give a outcome (or class). Also the probabilities for each class is estimated - """ - # first estimate the class probabilities - n = len(self._trainingExamples) - for i in range(self._nClasses): - self._classProbs[i] = 0.0 - - # for i in range(self._nClasses): - # self._classProbs[i] = float(self._classProbs[i])/n - - # first find the bounds for each descriptor value if necessary - if not self._useSigs and max(self._qBounds) > 0: - self._computeQuantBounds() - - # now compute the probabilities - ncls = {} - - incr = 1.0 / n - for eg in self._trainingExamples: - cls = eg[-1] - self._classProbs[cls] += incr - ncls[cls] = ncls.get(cls, 0) + 1 - tmp = self._condProbs[cls] - if not self._useSigs: - for ai in self._attrs: - bid = eg[ai] - if self._qBounds[ai] > 0: - bid = _getBinId(bid, self._QBoundVals[ai]) - tmp[ai][bid] += 1.0 - else: - for ai in self._attrs: - if eg[1].GetBit(ai): - tmp[ai][1] += 1.0 - else: - tmp[ai][0] += 1.0 - - # for key in self._condProbs: - for cls in range(self._nClasses): - if cls not in ncls: - continue - # cls = key[0] - tmp = self._condProbs[cls] - for ai in self._attrs: - if not self._useSigs: - nbnds = self._nPosVals[ai] - if (self._qBounds[ai] > 0): - nbnds = self._qBounds[ai] - else: - nbnds = 2 - for bid in range(nbnds): - if self._mEstimateVal <= 0.0: - # this is simple the fraction of of time this descriptor component assume - # this value for the examples that belong a specific class - # self._condProbs[key] = (float(self._condProbs[key]))/ncls[cls] - tmp[ai][bid] /= ncls[cls] - else: - # this a bit more complicated form - more appropriate for unbalanced data - # see "Machine Learning" by Tom Mitchell section 6.9.1.1 - - # this is the probability that this descriptor component can take this specific value - # in the lack of any other information is is simply the inverse of the number of - # possible values 'npossible' - # If we quantized this component then - # npossible = 1 + len(self._QBoundVals[ai]) - # else if we did no qunatize (the descriptor came quantized) - # npossible = nPossibleVals[ai] - # ai = key[1] - pdesc = 0.0 - if self._qBounds[ai] > 0: - pdesc = 1.0 / (1 + len(self._QBoundVals[ai])) - elif (self._nPosVals[ai] > 0): - pdesc = 1.0 / (self._nPosVals[ai]) - else: - raise ValueError('Neither Bounds set nor data pre-quantized for attribute ' + str(ai)) - tmp[ai][bid] += (self._mEstimateVal) * pdesc - tmp[ai][bid] /= (ncls[cls] + self._mEstimateVal) - - def ClassifyExamples(self, examples, appendExamples=0): - preds = [] - for eg in examples: - pred = self.ClassifyExample(eg, appendExamples) - preds.append(int(pred)) - return preds - - def GetClassificationDetails(self): - """ returns the probability of the last prediction """ - return self.mprob - - def ClassifyExample(self, example, appendExamples=0): - """ Classify an example by summing over the conditional probabilities - The most likely class is the one with the largest probability - """ - if appendExamples: - self._examples.append(example) - clsProb = {} - for key, prob in self._classProbs.items(): - clsProb[key] = prob - tmp = self._condProbs[key] - for ai in self._attrs: - if not (hasattr(self, '_useSigs') and self._useSigs): - bid = example[ai] - if self._qBounds[ai] > 0: - bid = _getBinId(bid, self._QBoundVals[ai]) - else: - if example[1].GetBit(ai): - bid = 1 - else: - bid = 0 - clsProb[key] *= tmp[ai][bid] - - mkey = -1 - self.mprob = -1.0 - for key, prob in clsProb.items(): - if (prob > self.mprob): - mkey = key - self.mprob = prob - - return mkey diff --git a/rdkit/ML/NaiveBayes/CrossValidate.py b/rdkit/ML/NaiveBayes/CrossValidate.py deleted file mode 100644 index 4eb268bb08c..00000000000 --- a/rdkit/ML/NaiveBayes/CrossValidate.py +++ /dev/null @@ -1,79 +0,0 @@ -# $Id$ -# -# Copyright (C) 2004-2005 Rational Discovery LLC. -# All Rights Reserved -# -""" handles doing cross validation with naive bayes models -and evaluation of individual models - -""" - -from rdkit.ML.Data import SplitData -from rdkit.ML.NaiveBayes.ClassificationModel import NaiveBayesClassifier - -try: - from rdkit.ML.FeatureSelect import CMIM -except ImportError: - CMIM = None - - -def makeNBClassificationModel(trainExamples, attrs, nPossibleValues, nQuantBounds, - mEstimateVal=-1.0, useSigs=False, ensemble=None, useCMIM=0, **kwargs): - if CMIM is not None and useCMIM > 0 and useSigs and not ensemble: - ensemble = CMIM.SelectFeatures(trainExamples, useCMIM, bvCol=1) - if ensemble: - attrs = ensemble - model = NaiveBayesClassifier(attrs, nPossibleValues, nQuantBounds, mEstimateVal=mEstimateVal, - useSigs=useSigs) - - model.SetTrainingExamples(trainExamples) - model.trainModel() - return model - - -def CrossValidate(NBmodel, testExamples, appendExamples=0): - - nTest = len(testExamples) - assert nTest, 'no test examples: %s' % str(testExamples) - badExamples = [] - nBad = 0 - preds = NBmodel.ClassifyExamples(testExamples, appendExamples) - assert len(preds) == nTest - - for i in range(nTest): - testEg = testExamples[i] - trueRes = testEg[-1] - res = preds[i] - - if (trueRes != res): - badExamples.append(testEg) - nBad += 1 - return float(nBad) / nTest, badExamples - - -def CrossValidationDriver(examples, attrs, nPossibleValues, nQuantBounds, mEstimateVal=0.0, - holdOutFrac=0.3, modelBuilder=makeNBClassificationModel, silent=0, - calcTotalError=0, **kwargs): - nTot = len(examples) - if not kwargs.get('replacementSelection', 0): - testIndices, trainIndices = SplitData.SplitIndices(nTot, holdOutFrac, silent=1, legacy=1, - replacement=0) - else: - testIndices, trainIndices = SplitData.SplitIndices(nTot, holdOutFrac, silent=1, legacy=0, - replacement=1) - - trainExamples = [examples[x] for x in trainIndices] - testExamples = [examples[x] for x in testIndices] - - NBmodel = modelBuilder(trainExamples, attrs, nPossibleValues, nQuantBounds, mEstimateVal, - **kwargs) - - if not calcTotalError: - xValError, _ = CrossValidate(NBmodel, testExamples, appendExamples=1) - else: - xValError, _ = CrossValidate(NBmodel, examples, appendExamples=0) - - if not silent: - print('Validation error was %%%4.2f' % (100 * xValError)) - NBmodel._trainIndices = trainIndices - return NBmodel, xValError diff --git a/rdkit/ML/NaiveBayes/UnitTestNB.py b/rdkit/ML/NaiveBayes/UnitTestNB.py deleted file mode 100644 index b0d660159bd..00000000000 --- a/rdkit/ML/NaiveBayes/UnitTestNB.py +++ /dev/null @@ -1,218 +0,0 @@ -# $Id$ -# -# Copyright (C) 2004-2005 Rational Discovery LLC -# All Rights Reserved -# -import os -import unittest - -from rdkit import RDConfig -from rdkit.DataStructs import ExplicitBitVect -from rdkit.ML.Data import DataUtils -from rdkit.ML.NaiveBayes import CrossValidate -from rdkit.ML.NaiveBayes.ClassificationModel import NaiveBayesClassifier - - -class TestCase(unittest.TestCase): - - def setUp(self): - DataUtils.InitRandomNumbers((25, 25)) - - def test1NaiveBayes(self): - fName = os.path.join(RDConfig.RDCodeDir, 'ML', 'NaiveBayes', 'test_data', 'stddata.csv') - data = DataUtils.TextFileToData(fName) - - examples = data.GetNamedData() - - nvars = data.GetNVars() - attrs = list(range(1, nvars + 1)) - npvals = [0] + [3] * nvars + [2] - qBounds = [0] + [2] * nvars + [0] - mod, err = CrossValidate.CrossValidationDriver(examples, attrs, npvals, qBounds, silent=True) - self.assertAlmostEqual(mod._classProbs[0], 0.5000, 4) - self.assertAlmostEqual(mod._classProbs[1], 0.5000, 4) - self.assertAlmostEqual(mod._QBoundVals[1][0], -0.0360, 4) - self.assertAlmostEqual(mod._QBoundVals[1][1], 0.114) - self.assertAlmostEqual(mod._QBoundVals[2][0], -0.7022, 4) - self.assertAlmostEqual(mod._QBoundVals[2][1], -0.16635, 4) - self.assertAlmostEqual(mod._QBoundVals[3][0], -0.3659, 4) - self.assertAlmostEqual(mod._QBoundVals[3][1], 0.4305, 4) - self.assertAlmostEqual(err, 0.2121, 4) - - mod, err = CrossValidate.CrossValidationDriver(examples, attrs, npvals, qBounds, silent=True, - calcTotalError=True) - self.assertAlmostEqual(mod._classProbs[0], 0.515151, 4) - self.assertAlmostEqual(mod._classProbs[1], 0.484848, 4) - self.assertAlmostEqual(mod._QBoundVals[1][0], -0.40315, 4) - self.assertAlmostEqual(mod._QBoundVals[1][1], 0.114) - self.assertAlmostEqual(mod._QBoundVals[2][0], -0.62185, 4) - self.assertAlmostEqual(mod._QBoundVals[2][1], -0.19965, 4) - self.assertAlmostEqual(mod._QBoundVals[3][0], 0.4305, 4) - self.assertAlmostEqual(mod._QBoundVals[3][1], 0.80305, 4) - self.assertAlmostEqual(err, 0.14563, 4) - - mod, err = CrossValidate.CrossValidationDriver(examples, attrs, npvals, qBounds, silent=True, - replacementSelection=True) - self.assertAlmostEqual(mod._classProbs[0], 0.5131578, 4) - self.assertAlmostEqual(mod._classProbs[1], 0.4868421, 4) - self.assertAlmostEqual(mod._QBoundVals[1][0], -0.036, 4) - self.assertAlmostEqual(mod._QBoundVals[1][1], 0.93465, 4) - self.assertAlmostEqual(mod._QBoundVals[2][0], -0.6696, 4) - self.assertAlmostEqual(mod._QBoundVals[2][1], -0.19965, 4) - self.assertAlmostEqual(mod._QBoundVals[3][0], -1.06785, 4) - self.assertAlmostEqual(mod._QBoundVals[3][1], 0.4305, 4) - self.assertAlmostEqual(err, 0.3, 4) - - def test2NaiveBayes(self): - fName = os.path.join(RDConfig.RDCodeDir, 'ML', 'NaiveBayes', 'test_data', 'stddata.csv') - data = DataUtils.TextFileToData(fName) - examples = data.GetNamedData() - - nvars = data.GetNVars() - attrs = list(range(1, nvars + 1)) - npvals = [0] + [3] * nvars + [2] - qBounds = [0] + [2] * nvars + [0] - mod, err = CrossValidate.CrossValidationDriver(examples, attrs, npvals, qBounds, - mEstimateVal=20.0, silent=True) - self.assertTrue(isinstance(mod, NaiveBayesClassifier)) - self.assertAlmostEqual(err, 0.1818, 4) - - self.assertEqual(mod.GetName(), '') - mod.SetName('modelName') - self.assertEqual(mod.GetName(), 'modelName') - mod.NameModel(None) - self.assertEqual(mod.GetName(), 'NaiveBayesClassifier') - - self.assertGreater(len(mod.GetExamples()), 0) - self.assertGreater(len(mod.GetTrainingExamples()), 0) - self.assertEqual(sorted(mod.GetTrainingExamples() + mod.GetExamples()), sorted(examples)) - - def test3(self): - examples = [ - ['a', 1, 0, 1, 0, 1], - ['b', 1, 0, 0, 0, 1], - ['c', 1, 0, 1, 0, 0], - ['d', 0, 1, 1, 0, 0], - ['e', 0, 1, 1, 1, 0], - ] - - nvars = len(examples[0]) - 2 - attrs = list(range(1, nvars + 1)) - npvals = [0] + [2] * nvars + [2] - qBounds = [0] + [0] * nvars + [0] - mdl = CrossValidate.makeNBClassificationModel(examples, attrs, npvals, qBounds) - nWrong = 0 - for eg in examples: - p = mdl.ClassifyExample(eg) - if p != eg[-1]: - nWrong += 1 - self.assertEqual(nWrong, 1) - - bitEx = [] - for eg in examples: - newEg = [eg[0], None, eg[-1]] - bv = ExplicitBitVect(nvars) - for i in range(nvars): - if eg[i + 1]: - bv.SetBit(i) - newEg[1] = bv - bitEx.append(newEg) - - attrs = list(range(nvars)) - mdl2 = CrossValidate.makeNBClassificationModel(bitEx, attrs, npvals, qBounds, useSigs=True) - nWrong = 0 - for eg in bitEx: - p = mdl2.ClassifyExample(eg) - if p != eg[-1]: - nWrong += 1 - self.assertEqual(nWrong, 1) - - # now compare: - for i in range(len(bitEx)): - eg = examples[i] - p1 = mdl.ClassifyExample(eg) - bitEg = bitEx[i] - p2 = mdl2.ClassifyExample(bitEg) - self.assertEqual(p1, p2) - v1 = mdl.GetClassificationDetails() - v2 = mdl.GetClassificationDetails() - self.assertAlmostEqual(v1, v2, 4) - - def test4(self): - examples = [ - ['a', 1, 0, 1, 0, 1], - ['b', 1, 0, 0, 0, 1], - ['c', 1, 0, 1, 0, 0], - ['d', 0, 1, 1, 0, 0], - ['e', 0, 1, 1, 1, 0], - ] - - nvars = len(examples[0]) - 2 - origNVars = nvars - nvars = 10 - npvals = [0] + [2] * nvars + [2] - qBounds = [0] + [0] * nvars + [0] - - bitEx = [] - for eg in examples: - newEg = [eg[0], None, eg[-1]] - bv = ExplicitBitVect(nvars) - for i in range(origNVars): - if eg[i + 1]: - bv.SetBit(i) - - # this bit will yield perfect accuracy if - # the attrs argument isn't being used properly: - if eg[-1]: - bv.SetBit(origNVars) - newEg[1] = bv - bitEx.append(newEg) - - attrs = list(range(origNVars)) - mdl2 = CrossValidate.makeNBClassificationModel(bitEx, attrs, npvals, qBounds, useSigs=True) - nWrong = 0 - for eg in bitEx: - p = mdl2.ClassifyExample(eg) - if p != eg[-1]: - nWrong += 1 - self.assertEqual(nWrong, 1) - - def _test5(self): # disabled because CMIM was removed # pragma: nocover - examples = [ - ['a', 1, 0, 1, 0, 1, 1, 0, 1], - ['b', 1, 0, 0, 0, 1, 0, 0, 1], - ['c', 1, 0, 1, 0, 1, 1, 0, 0], - ['d', 0, 1, 1, 0, 1, 0, 0, 0], - ['e', 0, 1, 1, 1, 0, 1, 0, 0], - ] - - nvars = len(examples[0]) - 2 - npvals = [0] + [2] * nvars + [2] - qBounds = [0] + [0] * nvars + [0] - - bitEx = [] - for eg in examples: - newEg = [eg[0], None, eg[-1]] - bv = ExplicitBitVect(nvars) - for i in range(nvars): - if eg[i + 1]: - bv.SetBit(i) - - # this bit will yield perfect accuracy if - # the attrs argument isn't being used properly: - newEg[1] = bv - bitEx.append(newEg) - - attrs = list(range(nvars)) - mdl2 = CrossValidate.makeNBClassificationModel(bitEx, attrs, npvals, qBounds, useSigs=True, - useCMIM=2) - nWrong = 0 - for eg in bitEx: - p = mdl2.ClassifyExample(eg) - if p != eg[-1]: - nWrong += 1 - self.assertEqual(nWrong, 1) - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/NaiveBayes/__init__.py b/rdkit/ML/NaiveBayes/__init__.py deleted file mode 100644 index 970f7608d13..00000000000 --- a/rdkit/ML/NaiveBayes/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# copyright 2003, Rational Discovery LLC -""" - -An implementation of the Naive Bayes Classifier - -""" -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) - -from rdkit import rdBase diff --git a/rdkit/ML/NaiveBayes/test_data/stddata.csv b/rdkit/ML/NaiveBayes/test_data/stddata.csv deleted file mode 100644 index 928ff4ba016..00000000000 --- a/rdkit/ML/NaiveBayes/test_data/stddata.csv +++ /dev/null @@ -1,104 +0,0 @@ -compound,d1,d2,d3,isferro -FeCo,1.0913,-0.7543,0.7261,1 -Ni,2.0524,-0.8602,-1.0678,1 -Fe,0.7908,-0.7166,0.7261,1 -AlFe3,0.1435,-0.6580,0.7261,1 -Mn,0.3466,-0.7166,1.4655,1 -FeNi3,1.6872,-0.8124,0.7261,1 -Fe3Pd,0.4447,-0.3887,0.7261,1 -HfV2,-2.0302,2.6722,-0.6417,0 -Pd,0.8050,-0.2631,-3.1515,0 -FePt3,0.6741,-0.2949,0.7261,1 -CrPt3,0.4907,-0.2932,0.8800,1 -Fe3Pt,0.6771,-0.5359,0.7261,1 -Cr3Rh,0.0139,-0.5688,0.8800,1 -Pt,0.7237,-0.2094,-2.3739,0 -Cr3Ir,-0.0056,-0.5547,0.8800,0 -V,-0.8564,-0.4110,-0.6417,0 -Cr3Ru,-0.0975,-0.5624,0.8800,0 -Co2Nb,-1.2086,1.4890,-0.1279,0 -CoTi2,-1.5913,1.4104,-0.1279,0 -Cr3Os,-0.1060,-0.5560,0.8800,0 -CoTi,-0.2221,-0.4641,-0.1279,0 -Nb,-1.3016,0.2463,-0.6039,0 -Cr3Pt,0.0584,-0.5275,0.8800,0 -Cr2Nb,-1.6990,1.8134,0.8800,0 -Rh,0.6818,-0.4194,-1.6761,0 -IrTi3,-1.0080,-0.1075,-1.3395,0 -Ta,-1.3148,0.2719,-1.4339,0 -CoZr,-0.6679,-0.0163,-0.1279,0 -Co2Y,-1.5869,2.3278,-0.1279,0 -Cr2Ta,-1.7041,1.8343,0.8800,0 -Ir2Sc,-1.6505,2.6079,-1.3395,0 -CoHf,-0.6078,-0.0875,-0.1279,0 -Ir2Zr,-1.6032,2.6333,-1.3395,0 -Ir,0.5753,-0.3508,-1.3395,0 -IrNb3,-0.9003,0.0813,-0.6039,0 -IrV3,-0.4667,-0.4190,-0.6417,0 -IrZr,-0.8941,0.2926,-1.3395,0 -CoV3,-0.2962,-0.5676,-0.1279,0 -HfRh3,-0.0420,-0.2256,-1.6761,0 -HfIr3,-0.0730,-0.1985,-1.3395,0 -HfTc,-1.1109,0.1702,0.1349,0 -HfPt,-0.5814,0.0710,-2.0440,0 -Ir3Ti,0.1005,-0.3422,-1.3395,0 -Cr,-0.1252,-0.6972,0.8800,0 -HfNi2,-1.2323,1.7308,-1.0678,0 -Mo,-0.7469,-0.1203,0.0564,0 -Cr2Zr,-1.8679,2.3340,0.8800,0 -Ir2Y,-1.7280,2.9955,-1.3395,0 -Cr2Hf,-1.8394,2.1827,0.8800,0 -Ir3V,0.2703,-0.3990,-0.6417,0 -HfMo2,-1.9802,3.0378,0.0564,0 -Cr2Ti,-1.7633,1.8210,0.8800,0 -CoSc,-0.7373,-0.1319,-0.1279,0 -W,-0.7660,-0.0969,-0.8055,0 -HfW2,-1.9940,3.1387,-0.8055,0 -IrY,-1.1596,0.5099,-1.3395,0 -Ir3Nb,0.0849,-0.2578,-0.6039,0 -Ir3Ta,0.1003,-0.2702,-1.3395,0 -HfRu,-0.8843,0.0587,-1.0679,0 -IrSc,-0.8507,0.0126,-1.3395,0 -HfOs,-0.9085,0.0931,-0.7472,0 -CoPt,0.9715,-0.4759,-0.1279,1 -MnNi,0.9938,-0.7072,1.4655,1 -MnPd,0.2448,-0.2438,1.4655,1 -MnPt,0.2773,-0.2687,1.4655,1 -NiPt,1.2429,-0.5150,-1.0678,1 -CoPt3,0.8790,-0.3664,-0.1279,1 -Cr3Pt,-0.0300,-0.4615,0.8800,1 -CrPt3,0.4907,-0.2932,0.8800,1 -FePt,0.5727,-0.3491,0.7261,1 -Mn3Pt,0.1275,-0.3629,1.4655,1 -MnNi3,1.4253,-0.7517,1.4655,1 -MnPt3,0.5298,-0.2578,1.4655,1 -Cr4Pt,-0.0222,-0.5275,0.8800,1 -FeRh,0.5797,-0.4767,0.7261,1 -Co3Pt,1.1736,-0.6356,-0.1279,1 -Co4Os,1.2991,-0.7863,-0.1279,1 -Co4Ru,1.2651,-0.7713,-0.1279,1 -CoRh,1.0610,-0.6324,-0.1279,1 -CrPt,0.0164,-0.2008,0.8800,1 -CrPt4,0.5553,-0.2879,0.8800,1 -Fe13Pd7,0.6068,-0.4447,0.7261,1 -Fe13Pt7,0.6229,-0.4548,0.7261,1 -Fe3Pt17,0.6900,-0.2578,0.7261,1 -Fe3Rh7,0.6425,-0.4670,0.7261,1 -FePd26,0.7860,-0.2679,0.7261,1 -Mn2Pt3,0.3673,-0.2560,1.4655,1 -MnPd3,0.8714,-0.4749,1.4655,1 -NiPd,1.1880,-0.4865,-1.0678,1 -NiRh,1.2259,-0.6094,-1.0678,0 -Fe2Hf,-0.3396,-0.3055,0.7261,1 -Fe2Zr,-0.4026,-0.2450,0.7261,1 -Co3Ir,1.1770,-0.6894,-0.1279,1 -Co3Ir7,1.1453,-0.6742,-0.1279,0 -Co3Rh,1.1770,-0.6894,-0.1279,1 -Co4Rh,1.2956,-0.7443,-0.1279,1 -CoIr,1.0196,-0.6113,-0.1279,0 -CoOs,0.8109,-0.6120,-0.1279,0 -CoRu,0.8978,-0.6584,-0.1279,0 -Fe16Rh,0.7489,-0.6808,0.7261,1 -Fe32Pt,0.7695,-0.6914,0.7261,1 -Fe9Pt,0.7160,-0.6296,0.7261,1 -Fe9Rh,0.7272,-0.6594,0.7261,1 diff --git a/rdkit/ML/Neural/ActFuncs.py b/rdkit/ML/Neural/ActFuncs.py deleted file mode 100644 index 9c4d0b74fd5..00000000000 --- a/rdkit/ML/Neural/ActFuncs.py +++ /dev/null @@ -1,65 +0,0 @@ -# -# Copyright (C) 2000-2008 greg Landrum -# -""" Activation functions for neural network nodes - -Activation functions should implement the following API: - - - _Eval(x)_: returns the value of the function at a given point - - - _Deriv(x)_: returns the derivative of the function at a given point - -The current Backprop implementation also requires: - - - _DerivFromVal(val)_: returns the derivative of the function when its - value is val - -In all cases _x_ is a float as is the value returned. - -""" -import math - - -class ActFunc(object): - """ "virtual base class" for activation functions - - """ - - def __call__(self, x): - return self.Eval(x) - - -class Sigmoid(ActFunc): - """ the standard sigmoidal function """ - - def Eval(self, x): - return 1. / (1. + math.exp(-self.beta * x)) - - def Deriv(self, x): - val = self.Eval(x) - return self.beta * val * (1. - val) - - def DerivFromVal(self, val): - return self.beta * val * (1. - val) - - def __init__(self, beta=1.): - self.beta = beta - - -class TanH(ActFunc): - """ the standard hyperbolic tangent function """ - - def Eval(self, x): - v1 = math.exp(self.beta * x) - v2 = math.exp(-self.beta * x) - return (v1 - v2) / (v1 + v2) - - def Deriv(self, x): - val = self.Eval(x) - return self.beta * (1 - val * val) - - def DerivFromVal(self, val): - return self.beta * (1 - val * val) - - def __init__(self, beta=1.): - self.beta = beta diff --git a/rdkit/ML/Neural/CrossValidate.py b/rdkit/ML/Neural/CrossValidate.py deleted file mode 100644 index da79f29716d..00000000000 --- a/rdkit/ML/Neural/CrossValidate.py +++ /dev/null @@ -1,127 +0,0 @@ -# -# Copyright (C) 2000 greg Landrum -# -""" handles doing cross validation with neural nets - -This is, perhaps, a little misleading. For the purposes of this module, -cross validation == evaluating the accuracy of a net. - -""" - -import math - -from rdkit.ML.Data import SplitData -from rdkit.ML.Neural import Network, Trainers - - -def CrossValidate(net, testExamples, tolerance, appendExamples=0): - """ Determines the classification error for the testExamples - **Arguments** - - - tree: a decision tree (or anything supporting a _ClassifyExample()_ method) - - - testExamples: a list of examples to be used for testing - - - appendExamples: a toggle which is ignored, it's just here to maintain - the same API as the decision tree code. - - **Returns** - - a 2-tuple consisting of: - - 1) the percent error of the net - - 2) a list of misclassified examples - - **Note** - At the moment, this is specific to nets with only one output - """ - nTest = len(testExamples) - nBad = 0 - badExamples = [] - for i in range(nTest): - testEx = testExamples[i] - trueRes = testExamples[i][-1] - res = net.ClassifyExample(testEx) - if math.fabs(trueRes - res) > tolerance: - badExamples.append(testEx) - nBad = nBad + 1 - - return float(nBad) / nTest, badExamples - - -def CrossValidationDriver(examples, attrs=[], nPossibleVals=[], holdOutFrac=.3, silent=0, - tolerance=0.3, calcTotalError=0, hiddenSizes=None, **kwargs): - """ - **Arguments** - - - examples: the full set of examples - - - attrs: a list of attributes to consider in the tree building - *This argument is ignored* - - - nPossibleVals: a list of the number of possible values each variable can adopt - *This argument is ignored* - - - holdOutFrac: the fraction of the data which should be reserved for the hold-out set - (used to calculate the error) - - - silent: a toggle used to control how much visual noise this makes as it goes. - - - tolerance: the tolerance for convergence of the net - - - calcTotalError: if this is true the entire data set is used to calculate - accuracy of the net - - - hiddenSizes: a list containing the size(s) of the hidden layers in the network. - if _hiddenSizes_ is None, one hidden layer containing the same number of nodes - as the input layer will be used - - **Returns** - - a 2-tuple containing: - - 1) the net - - 2) the cross-validation error of the net - - **Note** - At the moment, this is specific to nets with only one output - - """ - nTot = len(examples) - if not kwargs.get('replacementSelection', 0): - testIndices, trainIndices = SplitData.SplitIndices(nTot, holdOutFrac, silent=1, legacy=1, - replacement=0) - else: - testIndices, trainIndices = SplitData.SplitIndices(nTot, holdOutFrac, silent=1, legacy=0, - replacement=1) - trainExamples = [examples[x] for x in trainIndices] - testExamples = [examples[x] for x in testIndices] - - nTrain = len(trainExamples) - if not silent: - print('Training with %d examples' % (nTrain)) - - nInput = len(examples[0]) - 1 - nOutput = 1 - if hiddenSizes is None: - nHidden = nInput - netSize = [nInput, nHidden, nOutput] - else: - netSize = [nInput] + hiddenSizes + [nOutput] - net = Network.Network(netSize) - t = Trainers.BackProp() - t.TrainOnLine(trainExamples, net, errTol=tolerance, useAvgErr=0, silent=silent) - - nTest = len(testExamples) - if not silent: - print('Testing with %d examples' % nTest) - if not calcTotalError: - xValError, _ = CrossValidate(net, testExamples, tolerance) - else: - xValError, _ = CrossValidate(net, examples, tolerance) - if not silent: - print('Validation error was %%%4.2f' % (100 * xValError)) - net._trainIndices = trainIndices - return net, xValError diff --git a/rdkit/ML/Neural/NetNode.py b/rdkit/ML/Neural/NetNode.py deleted file mode 100644 index 6280c811f46..00000000000 --- a/rdkit/ML/Neural/NetNode.py +++ /dev/null @@ -1,149 +0,0 @@ -# -# Copyright (C) 2000-2008 greg Landrum -# -""" Contains the class _NetNode_ which is used to represent nodes in neural nets - -**Network Architecture:** - - A tacit assumption in all of this stuff is that we're dealing with - feedforward networks. - - The network itself is stored as a list of _NetNode_ objects. The list - is ordered in the sense that nodes in earlier/later layers than a - given node are guaranteed to come before/after that node in the list. - This way we can easily generate the values of each node by moving - sequentially through the list, we're guaranteed that every input for a - node has already been filled in. - - Each node stores a list (_inputNodes_) of indices of its inputs in the - main node list. - -""" -import numpy - -from . import ActFuncs - - -# FIX: this class has not been updated to new-style classes -# (RD Issue380) because that would break all of our legacy pickled -# data. Until a solution is found for this breakage, an update is -# impossible. -class NetNode: - """ a node in a neural network - - """ - - def Eval(self, valVect): - """Given a set of inputs (valVect), returns the output of this node - - **Arguments** - - - valVect: a list of inputs - - **Returns** - - the result of running the values in valVect through this node - - """ - if self.inputNodes and len(self.inputNodes) != 0: - # grab our list of weighted inputs - inputs = numpy.take(valVect, self.inputNodes) - # weight them - inputs = self.weights * inputs - # run that through the activation function - val = self.actFunc(sum(inputs)) - else: - val = 1 - # put our value in the list and return it (just in case) - valVect[self.nodeIndex] = val - return val - - def SetInputs(self, inputNodes): - """ Sets the input list - - **Arguments** - - - inputNodes: a list of _NetNode_s which are to be used as inputs - - **Note** - - If this _NetNode_ already has weights set and _inputNodes_ is a different length, - this will bomb out with an assertion. - - """ - if self.weights is not None: - assert len(self.weights) == len(inputNodes), \ - 'lengths of weights and nodes do not match' - self.inputNodes = inputNodes[:] - - def GetInputs(self): - """ returns the input list - - """ - return self.inputNodes - - def SetWeights(self, weights): - """ Sets the weight list - - **Arguments** - - - weights: a list of values which are to be used as weights - - **Note** - - If this _NetNode_ already has _inputNodes_ and _weights_ is a different length, - this will bomb out with an assertion. - - """ - if self.inputNodes: - assert len(weights) == len(self.inputNodes),\ - 'lengths of weights and nodes do not match' - self.weights = numpy.array(weights) - - def GetWeights(self): - """ returns the weight list - - """ - return self.weights - - def __init__(self, nodeIndex, nodeList, inputNodes=None, weights=None, actFunc=ActFuncs.Sigmoid, - actFuncParms=()): - """ Constructor - - **Arguments** - - - nodeIndex: the integer index of this node in _nodeList_ - - - nodeList: the list of other _NetNodes_ already in the network - - - inputNodes: a list of this node's inputs - - - weights: a list of this node's weights - - - actFunc: the activation function to be used here. Must support the API - of _ActFuncs.ActFunc_. - - - actFuncParms: a tuple of extra arguments to be passed to the activation function - constructor. - - **Note** - There should be only one copy of _inputNodes_, every _NetNode_ just has a pointer - to it so that changes made at one node propagate automatically to the others. - - """ - if inputNodes and weights: - assert (len(weights) == len(inputNodes)) - if weights: - self.weights = numpy.array(weights) - else: - self.weights = None - if inputNodes: - self.inputNodes = inputNodes[:] - else: - self.inputNodes = None - - self.nodeIndex = nodeIndex - # there's only one of these, everybody has a pointer to it. - self.nodeList = nodeList - - self.actFunc = actFunc(*actFuncParms) diff --git a/rdkit/ML/Neural/Network.py b/rdkit/ML/Neural/Network.py deleted file mode 100755 index 98d5c340ef7..00000000000 --- a/rdkit/ML/Neural/Network.py +++ /dev/null @@ -1,243 +0,0 @@ -# -# Copyright (C) 2000-2008 greg Landrum -# -""" Contains the class _Network_ which is used to represent neural nets - -**Network Architecture:** - - A tacit assumption in all of this stuff is that we're dealing with - feedforward networks. - - The network itself is stored as a list of _NetNode_ objects. The list - is ordered in the sense that nodes in earlier/later layers than a - given node are guaranteed to come before/after that node in the list. - This way we can easily generate the values of each node by moving - sequentially through the list, we're guaranteed that every input for a - node has already been filled in. - - Each node stores a list (_inputNodes_) of indices of its inputs in the - main node list. - -""" - -import random - -import numpy - -from rdkit.ML.Neural import ActFuncs, NetNode - - -# FIX: this class has not been updated to new-style classes -# (RD Issue380) because that would break all of our legacy pickled -# data. Until a solution is found for this breakage, an update is -# impossible. -class Network: - """ a neural network - - """ - - def ConstructRandomWeights(self, minWeight=-1, maxWeight=1): - """initialize all the weights in the network to random numbers - - **Arguments** - - - minWeight: the minimum value a weight can take - - - maxWeight: the maximum value a weight can take - - """ - for node in self.nodeList: - inputs = node.GetInputs() - if inputs: - weights = [random.uniform(minWeight, maxWeight) for _ in range(len(inputs))] - node.SetWeights(weights) - - def FullyConnectNodes(self): - """ Fully connects each layer in the network to the one above it - - - **Note** - this sets the connections, but does not assign weights - - """ - nodeList = list(range(self.numInputNodes)) - nConnections = 0 - for layer in range(self.numHiddenLayers): - for i in self.layerIndices[layer + 1]: - self.nodeList[i].SetInputs(nodeList) - nConnections = nConnections + len(nodeList) - nodeList = self.layerIndices[layer + 1] - - for i in self.layerIndices[-1]: - self.nodeList[i].SetInputs(nodeList) - nConnections = nConnections + len(nodeList) - self.nConnections = nConnections - - def ConstructNodes(self, nodeCounts, actFunc, actFuncParms): - """ build an unconnected network and set node counts - - **Arguments** - - - nodeCounts: a list containing the number of nodes to be in each layer. - the ordering is: - (nInput,nHidden1,nHidden2, ... , nHiddenN, nOutput) - - """ - self.nodeCounts = nodeCounts - self.numInputNodes = nodeCounts[0] - self.numOutputNodes = nodeCounts[-1] - self.numHiddenLayers = len(nodeCounts) - 2 - self.numInHidden = [None] * self.numHiddenLayers - for i in range(self.numHiddenLayers): - self.numInHidden[i] = nodeCounts[i + 1] - - numNodes = sum(self.nodeCounts) - self.nodeList = [None] * (numNodes) - for i in range(numNodes): - self.nodeList[i] = NetNode.NetNode(i, self.nodeList, actFunc=actFunc, - actFuncParms=actFuncParms) - - self.layerIndices = [None] * len(nodeCounts) - start = 0 - for i in range(len(nodeCounts)): - end = start + nodeCounts[i] - self.layerIndices[i] = list(range(start, end)) - start = end - - def GetInputNodeList(self): - """ returns a list of input node indices - """ - return self.layerIndices[0] - - def GetOutputNodeList(self): - """ returns a list of output node indices - """ - return self.layerIndices[-1] - - def GetHiddenLayerNodeList(self, which): - """ returns a list of hidden nodes in the specified layer - """ - return self.layerIndices[which + 1] - - def GetNumNodes(self): - """ returns the total number of nodes - """ - return sum(self.nodeCounts) - - def GetNumHidden(self): - """ returns the number of hidden layers - """ - return self.numHiddenLayers - - def GetNode(self, which): - """ returns a particular node - """ - return self.nodeList[which] - - def GetAllNodes(self): - """ returns a list of all nodes - """ - return self.nodeList - - def ClassifyExample(self, example, appendExamples=0): - """ classifies a given example and returns the results of the output layer. - - **Arguments** - - - example: the example to be classified - - **NOTE:** - - if the output layer is only one element long, - a scalar (not a list) will be returned. This is why a lot of the other - network code claims to only support single valued outputs. - - """ - if len(example) > self.numInputNodes: - if len(example) - self.numInputNodes > self.numOutputNodes: - example = example[1:-self.numOutputNodes] - else: - example = example[:-self.numOutputNodes] - assert len(example) == self.numInputNodes - totNumNodes = sum(self.nodeCounts) - results = numpy.zeros(totNumNodes, numpy.float64) - for i in range(self.numInputNodes): - results[i] = example[i] - for i in range(self.numInputNodes, totNumNodes): - self.nodeList[i].Eval(results) - self.lastResults = results[:] - if self.numOutputNodes == 1: - return results[-1] - else: - return results - - def GetLastOutputs(self): - """ returns the complete list of output layer values from the last time this node - classified anything""" - return self.lastResults - - def __str__(self): - """ provides a string representation of the network """ - outStr = 'Network:\n' - for i in range(len(self.nodeList)): - outStr = outStr + '\tnode(% 3d):\n' % i - outStr = outStr + '\t\tinputs: %s\n' % (str(self.nodeList[i].GetInputs())) - outStr = outStr + '\t\tweights: %s\n' % (str(self.nodeList[i].GetWeights())) - - outStr = outStr + 'Total Number of Connections: % 4d' % self.nConnections - return outStr - - def __init__(self, nodeCounts, nodeConnections=None, actFunc=ActFuncs.Sigmoid, actFuncParms=(), - weightBounds=1): - """ Constructor - - This constructs and initializes the network based upon the specified - node counts. - - A fully connected network with random weights is constructed. - - **Arguments** - - - nodeCounts: a list containing the number of nodes to be in each layer. - the ordering is: - (nInput,nHidden1,nHidden2, ... , nHiddenN, nOutput) - - - nodeConnections: I don't know why this is here, but it's optional. ;-) - - - actFunc: the activation function to be used here. Must support the API - of _ActFuncs.ActFunc_. - - - actFuncParms: a tuple of extra arguments to be passed to the activation function - constructor. - - - weightBounds: a float which provides the boundary on the random initial weights - - """ - self.ConstructNodes(nodeCounts, actFunc, actFuncParms) - self.FullyConnectNodes() - self.ConstructRandomWeights(minWeight=-weightBounds, maxWeight=weightBounds) - self.lastResults = [] - - -if __name__ == '__main__': # pragma: nocover - - print('[2,2,2]') - net = Network([2, 2, 2]) - print(net) - - print('[2,4,1]') - net = Network([2, 4, 1]) - print(net) - - print('[2,2]') - net = Network([2, 2]) - print(net) - inp = [1, 0] - res = net.ClassifyExample(inp) - print(inp, '->', res) - inp = [0, 1] - res = net.ClassifyExample(inp) - print(inp, '->', res) - inp = [.5, .5] - res = net.ClassifyExample(inp) - print(inp, '->', res) diff --git a/rdkit/ML/Neural/Trainers.py b/rdkit/ML/Neural/Trainers.py deleted file mode 100755 index 7ee7218c703..00000000000 --- a/rdkit/ML/Neural/Trainers.py +++ /dev/null @@ -1,270 +0,0 @@ -# -# Copyright (C) 2000-2008 greg Landrum -# -""" Training algorithms for feed-forward neural nets - - Unless noted otherwise, algorithms and notation are taken from: - "Artificial Neural Networks: Theory and Applications", - Dan W. Patterson, Prentice Hall, 1996 - -""" - -import numpy - - -class Trainer(object): - """ "virtual base class" for network trainers - - """ - pass - - -class BackProp(Trainer): - """implement back propagation (algorithm on pp 153-154 of Patterson) - - I don't *think* that I've made any assumptions about the connectivity of - the net (i.e. full connectivity between layers is not required). - - **NOTE:** this code is currently making the assumption that the activation - functions on the nodes in the network are capable of calculating their - derivatives using only their values (i.e. a DerivFromVal method should - exist). This shouldn't be too hard to change. - - """ - - def StepUpdate(self, example, net, resVect=None): - """ does a BackProp step based upon the example - - **Arguments** - - - example: a 2-tuple: - 1) a list of variable values values - 2) a list of result values (targets) - - - net: a _Network_ (or something supporting the same API) - - - resVect: if this is nonzero, then the network is not required to - classify the _example_ - - **Returns** - - the backprop error from _network_ **before the update** - - **Note** - - In case it wasn't blindingly obvious, the weights in _network_ are modified - in the course of taking a backprop step. - - """ - totNumNodes = net.GetNumNodes() - if self.oldDeltaW is None: - self.oldDeltaW = numpy.zeros(totNumNodes, numpy.float64) - outputNodeList = net.GetOutputNodeList() - nOutput = len(outputNodeList) - targetVect = numpy.array(example[-nOutput:], numpy.float64) - trainVect = example[:-nOutput] - if resVect is None: - # classify the example - net.ClassifyExample(trainVect) - resVect = net.GetLastOutputs() - outputs = numpy.take(resVect, outputNodeList) - errVect = targetVect - outputs - - delta = numpy.zeros(totNumNodes, numpy.float64) - # start with the output layer - for i in range(len(outputNodeList)): - idx = outputNodeList[i] - node = net.GetNode(idx) - # the deltas here are easy - delta[idx] = errVect[i] * node.actFunc.DerivFromVal(resVect[idx]) - # use these results to start working on the deltas of the preceding layer - inputs = node.GetInputs() - weights = delta[idx] * node.GetWeights() - for j in range(len(inputs)): - idx2 = inputs[j] - delta[idx2] = delta[idx2] + weights[j] - - # now propagate the deltas backwards - for layer in range(net.GetNumHidden() - 1, -1, -1): - nodesInLayer = net.GetHiddenLayerNodeList(layer) - for idx in nodesInLayer: - node = net.GetNode(idx) - # start by finishing off the error term for this guy - delta[idx] = delta[idx] * node.actFunc.DerivFromVal(resVect[idx]) - - # and then propagate our errors to the preceding layer - if layer != 0: - inputs = node.GetInputs() - weights = delta[idx] * node.GetWeights() - for i in range(len(inputs)): - idx2 = inputs[i] - delta[idx2] = delta[idx2] + weights[i] - - # okey dokey... we've now got the deltas for each node, use those - # to update the weights (whew!) - nHidden = net.GetNumHidden() - for layer in range(0, nHidden + 1): - if layer == nHidden: - idxList = net.GetOutputNodeList() - else: - idxList = net.GetHiddenLayerNodeList(layer) - for idx in idxList: - node = net.GetNode(idx) - dW = self.speed * delta[idx] * numpy.take(resVect, node.GetInputs()) - newWeights = node.GetWeights() + dW - node.SetWeights(newWeights) - - # return the RMS error from the OLD network - return numpy.sqrt(errVect * errVect)[0] - - def TrainOnLine(self, examples, net, maxIts=5000, errTol=0.1, useAvgErr=1, silent=0): - """ carries out online training of a neural net - - The definition of online training is that the network is updated after - each example is presented. - - **Arguments** - - - examples: a list of 2-tuple: - 1) a list of variable values values - 2) a list of result values (targets) - - - net: a _Network_ (or something supporting the same API) - - - maxIts: the maximum number of *training epochs* (see below for definition) to be - run - - - errTol: the tolerance for convergence - - - useAvgErr: if this toggle is nonzero, then the error at each step will be - divided by the number of training examples for the purposes of checking - convergence. - - - silent: controls the amount of visual noise produced as this runs. - - - **Note** - - a *training epoch* is one complete pass through all the training examples - - """ - nExamples = len(examples) - converged = 0 - cycle = 0 - - while (not converged) and (cycle < maxIts): - maxErr = 0 - newErr = 0 - # print('bp: ',cycle) - for example in examples: - localErr = self.StepUpdate(example, net) - newErr += localErr - if localErr > maxErr: - maxErr = localErr - if useAvgErr == 1: - newErr = newErr / nExamples - else: - newErr = maxErr - # print('\t',newErr,errTol) - - if newErr <= errTol: - converged = 1 - - -# if cycle % 10 == 0 and not silent: - if not silent: - print('epoch %d, error: % 6.4f' % (cycle, newErr)) - - cycle = cycle + 1 - if not silent: - if converged: - print('Converged after %d epochs.' % cycle) - else: - print('NOT Converged after %d epochs.' % cycle) - print('final error: % 6.4f' % newErr) - - def __init__(self, speed=0.5, momentum=0.7): - """ Constructor - - **Arguments** - - - speed: the speed parameter for back prop training - - - momentum: the momentum term for back prop training - *Not currently used* - - """ - self.speed = speed - self.momentum = momentum - self.oldDeltaW = None - -if __name__ == '__main__': # pragma: nocover - from rdkit.ML.Neural import Network - - def testAnd(): - examples = [[[0, 0, 1], [0.1]], [[0, 1, 1], [.1]], [[1, 0, 1], [.1]], [[1, 1, 1], [.9]]] - net = Network.Network([3, 1]) - t = BackProp() - t.TrainOnLine(examples, net) - return net - - def testOr(): - examples = [[[0, 0, 1], [0.1]], [[0, 1, 1], [.9]], [[1, 0, 1], [.9]], [[1, 1, 1], [.9]]] - net = Network.Network([3, 1]) - t = BackProp() - t.TrainOnLine(examples, net, maxIts=1000, useAvgErr=0) - print('classifications:') - for example in examples: - res = net.ClassifyExample(example[0]) - print('%f -> %f' % (example[1][0], res)) - - return net - - def testXor(): - examples = [[[0, 0, 1], [.1]], [[0, 1, 1], [.9]], [[1, 0, 1], [.9]], [[1, 1, 1], [.1]]] - net = Network.Network([3, 3, 1]) - - t = BackProp(speed=.8) - t.TrainOnLine(examples, net, errTol=0.2) - return net - - def testLinear(): - examples = [ - [.1, .1], - [.2, .2], - [.3, .3], - [.4, .4], - [.8, .8], - ] - net = Network.Network([1, 2, 1]) - t = BackProp(speed=.8) - t.TrainOnLine(examples, net, errTol=0.1, useAvgErr=0) - print('classifications:') - for example in examples: - res = net.ClassifyExample(example[:-1]) - print('%f -> %f' % (example[-1], res)) - - return net - - def runProfile(command): - import random - random.seed(23) - import profile - import pstats - datFile = '%s.prof.dat' % (command) - profile.run('%s()' % command, datFile) - stats = pstats.Stats(datFile) - stats.strip_dirs() - stats.sort_stats('time').print_stats() - - if 0: - net = testXor() - print('Xor:', net) - import pickle - outF = open('xornet.pkl', 'wb+') - pickle.dump(net, outF) - outF.close() - else: - # runProfile('testLinear') - net = testLinear() - # net = testOr() diff --git a/rdkit/ML/Neural/UnitTestOther.py b/rdkit/ML/Neural/UnitTestOther.py deleted file mode 100755 index 953c892a7f9..00000000000 --- a/rdkit/ML/Neural/UnitTestOther.py +++ /dev/null @@ -1,80 +0,0 @@ -# -# Copyright (C) 2000 greg Landrum -# -""" unit tests for the Neural network trainer implementation - - this basically works out **all** of the network code - -""" - -import unittest - -from rdkit.ML.Neural.ActFuncs import Sigmoid, TanH -from rdkit.ML.Neural.NetNode import NetNode -from rdkit.ML.Neural.Network import Network - - -class TestCaseActFuncs(unittest.TestCase): - - def test_Sigmoid(self): - f = Sigmoid() - self.assertAlmostEqual(f(0), 0.5) - self.assertAlmostEqual(f(0), f.Eval(0)) - self.assertAlmostEqual(f.Deriv(0), 0.25) - self.assertAlmostEqual(f(1), 1.0 - f(-1)) - self.assertAlmostEqual(f(2), 1.0 - f(-2)) - self.assertAlmostEqual(f.Deriv(1), f.Deriv(-1)) - self.assertAlmostEqual(f.Deriv(2), f.Deriv(-2)) - self.assertLess(f(1), f(2)) - self.assertLess(f.Deriv(2), f.Deriv(1)) - self.assertAlmostEqual(f.Deriv(1), f.DerivFromVal(f(1))) - - def test_TanH(self): - f = TanH() - self.assertAlmostEqual(f(0), 0.0) - self.assertAlmostEqual(f(0), f.Eval(0)) - self.assertAlmostEqual(f.Deriv(0), 1.0) - self.assertAlmostEqual(f(1), -f(-1)) - self.assertAlmostEqual(f(2), -f(-2)) - self.assertAlmostEqual(f.Deriv(1), f.Deriv(-1)) - self.assertAlmostEqual(f.Deriv(2), f.Deriv(-2)) - self.assertLess(f(1), f(2)) - self.assertLess(f.Deriv(2), f.Deriv(1)) - self.assertAlmostEqual(f.Deriv(1), f.DerivFromVal(f(1))) - - -class TestCaseNetNode(unittest.TestCase): - - def test_NetNode(self): - # A node without input always returns 1 - nodeList = [None] * 2 - node = NetNode(0, nodeList) - nodeList[0] = node - valVect = [None] * 2 - self.assertEqual(node.Eval(valVect), 1) - self.assertEqual(valVect, [1, None]) - - node = NetNode(1, nodeList, inputNodes=[0], weights=[0.1]) - self.assertRaises(AssertionError, node.SetWeights, [0, 1]) - self.assertRaises(AssertionError, node.SetInputs, [0, 1]) - - -class TestCaseNetwork(unittest.TestCase): - - def test_Network(self): - nodeCounts = [2, 2, 1, 2] - net = Network(nodeCounts) - self.assertEqual(net.GetNumNodes(), 7) - self.assertEqual(len(net.GetAllNodes()), 7) - self.assertEqual(net.GetInputNodeList(), [0, 1]) - self.assertEqual(net.GetHiddenLayerNodeList(0), [2, 3]) - self.assertEqual(net.GetHiddenLayerNodeList(1), [4]) - self.assertEqual(net.GetOutputNodeList(), [5, 6]) - - # We get a representation of the network - s = str(net) - self.assertIn('Network', s) - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/Neural/UnitTestTrainer.py b/rdkit/ML/Neural/UnitTestTrainer.py deleted file mode 100755 index e319fced632..00000000000 --- a/rdkit/ML/Neural/UnitTestTrainer.py +++ /dev/null @@ -1,86 +0,0 @@ -# -# Copyright (C) 2000 greg Landrum -# -""" unit tests for the Neural network trainer implementation - - this basically works out **all** of the network code - -""" - -import random -import unittest -from io import StringIO - -from rdkit.ML.Neural import Network, Trainers -from rdkit.ML.Neural.CrossValidate import CrossValidate, CrossValidationDriver -from rdkit.TestRunner import redirect_stdout - - -class TrainerTestCase(unittest.TestCase): - - def setUp(self): - random.seed(23) - self.trainTol = 0.3 - self.orExamples = [[0, 0, 1, 0.1], [0, 1, 1, .9], [1, 0, 1, .9], [1, 1, 1, .9]] - self.andExamples = [[0, 0, 1, 0.1], [0, 1, 1, .1], [1, 0, 1, .1], [1, 1, 1, .9]] - self.xorExamples = [[0, 0, 1, .1], [0, 1, 1, .9], [1, 0, 1, .9], [1, 1, 1, .1]] - self.linExamples = [[.1, .1], [.2, .2], [.3, .3], [.4, .4], [.8, .8]] - - def _trainExamples(self, ex, arch=[3, 1], useAvgErr=False): - net = Network.Network(arch) - t = Trainers.BackProp() - t.TrainOnLine(ex, net, errTol=self.trainTol, useAvgErr=useAvgErr, silent=True) - errs = [abs(x[-1] - net.ClassifyExample(x)) for x in ex] - return net, errs - - def testBackpropOr(self): - # " testing backprop training on or " - _, errs = self._trainExamples(self.orExamples) - assert max(errs) < self.trainTol, 'net did not converge properly on or' - - def testBackpropAnd(self): - # " testing backprop training on and " - _, errs = self._trainExamples(self.andExamples) - assert max(errs) < self.trainTol, 'net did not converge properly on and' - - def testBackpropLin(self): - # " testing backprop training on a linear function " - _, errs = self._trainExamples(self.linExamples, arch=[1, 2, 1]) - assert max(errs) < self.trainTol, 'net did not converge properly on linear fit' - - _, errs = self._trainExamples(self.linExamples, arch=[1, 2, 1], useAvgErr=True) - assert max(errs) < 0.4, 'net did not converge properly on or' - - def test_multipleHiddenLayers(self): - _, errs = self._trainExamples(self.linExamples, arch=[1, 1, 2, 1]) - assert max(errs) < self.trainTol, 'net did not converge properly on linear fit' - - def test_CrossValidate(self): - # We just check here that the code works - net, _ = self._trainExamples(self.orExamples) - percentage, badExamples = CrossValidate(net, self.orExamples, 0.2) - self.assertEqual(percentage, 1.0 / 4) - self.assertEqual(len(badExamples), 1) - - percentage, badExamples = CrossValidate(net, self.orExamples, self.trainTol) - self.assertEqual(percentage, 0.0) - self.assertEqual(len(badExamples), 0) - - net, cvError = CrossValidationDriver(self.orExamples + self.orExamples, silent=True) - self.assertEqual(cvError, 0.5) - - net, cvError = CrossValidationDriver(self.orExamples + self.orExamples, silent=True, - replacementSelection=True) - self.assertEqual(cvError, 0.0) - - net, cvError = CrossValidationDriver(self.orExamples + self.orExamples, silent=True, - calcTotalError=True) - self.assertEqual(cvError, 0.25) - - f = StringIO() - with redirect_stdout(f): - CrossValidationDriver(self.orExamples + self.orExamples) - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/Neural/__init__.py b/rdkit/ML/Neural/__init__.py deleted file mode 100644 index eeba8e62bbd..00000000000 --- a/rdkit/ML/Neural/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) diff --git a/rdkit/ML/Neural/architecture.txt b/rdkit/ML/Neural/architecture.txt deleted file mode 100644 index dfe877fefa0..00000000000 --- a/rdkit/ML/Neural/architecture.txt +++ /dev/null @@ -1,20 +0,0 @@ -# -# Copyright (C) 2000 greg Landrum -# - -Network Architecture: - -A tacit assumption in all of this stuff is that we're dealing with -feedforward networks. - -The network itself is stored as a list of NetNode objects. The list -is ordered in the sense that nodes in earlier/later layers than a -given node are guaranteed to come before/after that node in the list. -This way we can easily generate the values of each node by moving -sequentially through the list, we're guaranteed that every input for a -node has already been filled in. - -Each node stores a list (inputNodes) of indices of its inputs in the -main node list. - - diff --git a/rdkit/ML/ScreenComposite.py b/rdkit/ML/ScreenComposite.py deleted file mode 100755 index 87148364a46..00000000000 --- a/rdkit/ML/ScreenComposite.py +++ /dev/null @@ -1,1647 +0,0 @@ -# $Id$ -# -# Copyright (C) 2000-2008 greg Landrum and Rational Discovery LLC -# -# @@ All Rights Reserved @@ -# This file is part of the RDKit. -# The contents are covered by the terms of the BSD license -# which is included in the file license.txt, found at the root -# of the RDKit source tree. -# -""" command line utility for screening composite models - -**Usage** - - _ScreenComposite [optional args] modelfile(s) datafile_ - -Unless indicated otherwise (via command line arguments), _modelfile_ is -a file containing a pickled composite model and _filename_ is a QDAT file. - -**Command Line Arguments** - - - -t *threshold value(s)*: use high-confidence predictions for the final - analysis of the hold-out data. The threshold value can be either a single - float or a list/tuple of floats. All thresholds should be between - 0.0 and 1.0 - - - -D: do a detailed screen. - - - -d *database name*: instead of reading the data from a QDAT file, - pull it from a database. In this case, the _datafile_ argument - provides the name of the database table containing the data set. - - - -N *note*: use all models from the database which have this note. - The modelfile argument should contain the name of the table - with the models. - - - -H: screen only the hold out set (works only if a version of - BuildComposite more recent than 1.2.2 was used). - - - -T: screen only the training set (works only if a version of - BuildComposite more recent than 1.2.2 was used). - - - -E: do a detailed Error analysis. This shows each misclassified - point and the number of times it was missed across all screened - composites. If the --enrich argument is also provided, only compounds - that have true activity value equal to the enrichment value will be - used. - - - --enrich *enrichVal*: target "active" value to be used in calculating - enrichments. - - - -A: show All predictions. - - - -S: shuffle activity values before screening - - - -R: randomize activity values before screening - - - -F *filter frac*: filters the data before training to change the - distribution of activity values in the training set. *filter frac* - is the fraction of the training set that should have the target value. - **See note in BuildComposite help about data filtering** - - - -v *filter value*: filters the data before training to change the - distribution of activity values in the training set. *filter value* - is the target value to use in filtering. - **See note in BuildComposite help about data filtering** - - - -V: be verbose when screening multiple models - - - -h: show this message and exit - - - --OOB: Do out an "out-of-bag" generalization error estimate. This only - makes sense when applied to the original data set. - - - --pickleCol *colId*: index of the column containing a pickled value - (used primarily for cases where fingerprints are used as descriptors) - - *** Options for making Prediction (Hanneke) Plots *** - - - --predPlot=: triggers the generation of a Hanneke plot and - sets the name of the .txt file which will hold the output data. - A Gnuplot control file, .gnu, will also be generated. - - - --predActTable= (optional): name of the database table - containing activity values. If this is not provided, activities - will be read from the same table containing the screening data - - - --predActCol= (optional): name of the activity column. If not - provided, the name of the last column in the activity table will - be used. - - - --predLogScale (optional): If provided, the x axis of the - prediction plot (the activity axis) will be plotted using a log - scale - - - --predShow: launch a gnuplot instance and display the prediction - plot (the plot will still be written to disk). - - *** The following options are likely obsolete *** - - - -P: read pickled data. The datafile argument should contain - a pickled data set. *relevant only to qdat files* - - - -q: data are not quantized (the composite should take care of - quantization itself if it requires quantized data). *relevant only to - qdat files* - - - -""" -from warnings import warn - -warn('This module is deprecated and will be removed in the 2024.03 release', DeprecationWarning, - stacklevel=2) - -import os -import pickle -import sys - -import numpy - -from rdkit import DataStructs -from rdkit.Dbase import DbModule -from rdkit.Dbase.DbConnection import DbConnect -from rdkit.ML import CompositeRun -from rdkit.ML.Data import DataUtils, SplitData - -try: - from PIL import Image, ImageDraw -except ImportError: - hasPil = 0 -else: - hasPil = 1 - -_details = CompositeRun.CompositeRun() - -__VERSION_STRING = "3.3.0" - - -def message(msg, noRet=0): - """ emits messages to _sys.stdout_ - override this in modules which import this one to redirect output - - **Arguments** - - - msg: the string to be displayed - - """ - if noRet: - sys.stdout.write('%s ' % (msg)) - else: - sys.stdout.write('%s\n' % (msg)) - - -def error(msg): - """ emits messages to _sys.stderr_ - override this in modules which import this one to redirect output - - **Arguments** - - - msg: the string to be displayed - - """ - sys.stderr.write('ERROR: %s\n' % (msg)) - - -def CalcEnrichment(mat, tgt=1): - if tgt < 0 or tgt >= mat.shape[0]: - return 0 - nPts = float(sum(sum(mat))) - nTgtPred = float(sum(mat[:, tgt])) - if nTgtPred: - pctCorrect = mat[tgt, tgt] / nTgtPred - nTgtReal = float(sum(mat[tgt, :])) - pctOverall = nTgtReal / nPts - else: - return 0.0 - return pctCorrect / pctOverall - - -def CollectResults(indices, dataSet, composite, callback=None, appendExamples=0, errorEstimate=0): - """ screens a set of examples through a composite and returns the - results -#DOC - - **Arguments** - - - examples: the examples to be screened (a sequence of sequences) - it's assumed that the last element in each example is it's "value" - - - composite: the composite model to be used - - - callback: (optional) if provided, this should be a function - taking a single argument that is called after each example is - screened with the number of examples screened so far as the - argument. - - - appendExamples: (optional) this value is passed on to the - composite's _ClassifyExample()_ method. - - - errorEstimate: (optional) calculate the "out of bag" error - estimate for the composite using Breiman's definition. This - only makes sense when screening the original data set! - [L. Breiman "Out-of-bag Estimation", UC Berkeley Dept of - Statistics Technical Report (1996)] - - **Returns** - - a list of 3-tuples _nExamples_ long: - - 1) answer: the value from the example - - 2) pred: the composite model's prediction - - 3) conf: the confidence of the composite - - """ - # for i in range(len(composite)): - # print(' ',i,'TRAIN:',composite[i][0]._trainIndices) - - for j in range(len(composite)): - tmp = composite.GetModel(j) - if hasattr(tmp, '_trainIndices') and type(tmp._trainIndices) != dict: - tis = {} - if hasattr(tmp, '_trainIndices'): - for v in tmp._trainIndices: - tis[v] = 1 - tmp._trainIndices = tis - - nPts = len(indices) - res = [None] * nPts - for i in range(nPts): - idx = indices[i] - example = dataSet[idx] - if errorEstimate: - use = [] - for j in range(len(composite)): - mdl = composite.GetModel(j) - if not mdl._trainIndices.get(idx, 0): - use.append(j) - else: - use = None - # print('IDX:',idx,'use:',use ) - pred, conf = composite.ClassifyExample(example, appendExample=appendExamples, onlyModels=use) - if composite.GetActivityQuantBounds(): - answer = composite.QuantizeActivity(example)[-1] - else: - answer = example[-1] - res[i] = answer, pred, conf - if callback: - callback(i) - return res - - -def DetailedScreen(indices, data, composite, threshold=0, screenResults=None, goodVotes=None, - badVotes=None, noVotes=None, callback=None, appendExamples=0, errorEstimate=0): - """ screens a set of examples cross a composite and breaks the - predictions into *correct*,*incorrect* and *unclassified* sets. -#DOC - **Arguments** - - - examples: the examples to be screened (a sequence of sequences) - it's assumed that the last element in each example is its "value" - - - composite: the composite model to be used - - - threshold: (optional) the threshold to be used to decide whether - or not a given prediction should be kept - - - screenResults: (optional) the results of screening the results - (a sequence of 3-tuples in the format returned by - _CollectResults()_). If this is provided, the examples will not - be screened again. - - - goodVotes,badVotes,noVotes: (optional) if provided these should - be lists (or anything supporting an _append()_ method) which - will be used to pass the screening results back. - - - callback: (optional) if provided, this should be a function - taking a single argument that is called after each example is - screened with the number of examples screened so far as the - argument. - - - appendExamples: (optional) this value is passed on to the - composite's _ClassifyExample()_ method. - - - errorEstimate: (optional) calculate the "out of bag" error - estimate for the composite using Breiman's definition. This - only makes sense when screening the original data set! - [L. Breiman "Out-of-bag Estimation", UC Berkeley Dept of - Statistics Technical Report (1996)] - - **Notes** - - - since this function doesn't return anything, if one or more of - the arguments _goodVotes_, _badVotes_, and _noVotes_ is not - provided, there's not much reason to call it - - """ - if screenResults is None: - screenResults = CollectResults(indices, data, composite, callback=callback, - appendExamples=appendExamples, errorEstimate=errorEstimate) - if goodVotes is None: - goodVotes = [] - if badVotes is None: - badVotes = [] - if noVotes is None: - noVotes = [] - for i in range(len(screenResults)): - answer, pred, conf = screenResults[i] - if conf > threshold: - if pred != answer: - badVotes.append((answer, pred, conf, i)) - else: - goodVotes.append((answer, pred, conf, i)) - else: - noVotes.append((answer, pred, conf, i)) - - -def ShowVoteResults(indices, data, composite, nResultCodes, threshold, verbose=1, - screenResults=None, callback=None, appendExamples=0, goodVotes=None, - badVotes=None, noVotes=None, errorEstimate=0): - """ screens the results and shows a detailed workup - - The work of doing the screening and processing the results is - handled by _DetailedScreen()_ -#DOC - - **Arguments** - - - examples: the examples to be screened (a sequence of sequences) - it's assumed that the last element in each example is its "value" - - - composite: the composite model to be used - - - nResultCodes: the number of possible results the composite can - return - - - threshold: the threshold to be used to decide whether or not a - given prediction should be kept - - - screenResults: (optional) the results of screening the results - (a sequence of 3-tuples in the format returned by - _CollectResults()_). If this is provided, the examples will not - be screened again. - - - callback: (optional) if provided, this should be a function - taking a single argument that is called after each example is - screened with the number of examples screened so far as the - argument. - - - appendExamples: (optional) this value is passed on to the - composite's _ClassifyExample()_ method. - - - goodVotes,badVotes,noVotes: (optional) if provided these should - be lists (or anything supporting an _append()_ method) which - will be used to pass the screening results back. - - - errorEstimate: (optional) calculate the "out of bag" error - estimate for the composite using Breiman's definition. This - only makes sense when screening the original data set! - [L. Breiman "Out-of-bag Estimation", UC Berkeley Dept of - Statistics Technical Report (1996)] - - **Returns** - - a 7-tuple: - - 1) the number of good (correct) predictions - - 2) the number of bad (incorrect) predictions - - 3) the number of predictions skipped due to the _threshold_ - - 4) the average confidence in the good predictions - - 5) the average confidence in the bad predictions - - 6) the average confidence in the skipped predictions - - 7) the results table - - """ - nExamples = len(indices) - if goodVotes is None: - goodVotes = [] - if badVotes is None: - badVotes = [] - if noVotes is None: - noVotes = [] - DetailedScreen(indices, data, composite, threshold, screenResults=screenResults, - goodVotes=goodVotes, badVotes=badVotes, noVotes=noVotes, callback=callback, - appendExamples=appendExamples, errorEstimate=errorEstimate) - nBad = len(badVotes) - nGood = len(goodVotes) - nClassified = nGood + nBad - if verbose: - print('\n\t*** Vote Results ***') - print('misclassified: %d/%d (%%%4.2f)\t%d/%d (%%%4.2f)' % - (nBad, nExamples, 100. * float(nBad) / nExamples, nBad, nClassified, - 100. * float(nBad) / nClassified)) - nSkip = len(noVotes) - if nSkip > 0: - if verbose: - print('skipped: %d/%d (%%% 4.2f)' % (nSkip, nExamples, 100. * float(nSkip) / nExamples)) - noConf = numpy.array([x[2] for x in noVotes]) - avgSkip = sum(noConf) / float(nSkip) - else: - avgSkip = 0. - - if nBad > 0: - badConf = numpy.array([x[2] for x in badVotes]) - avgBad = sum(badConf) / float(nBad) - else: - avgBad = 0. - - if nGood > 0: - goodRes = [x[1] for x in goodVotes] - goodConf = numpy.array([x[2] for x in goodVotes]) - avgGood = sum(goodConf) / float(nGood) - else: - goodRes = [] - goodConf = [] - avgGood = 0. - - if verbose: - print() - print('average correct confidence: % 6.4f' % avgGood) - print('average incorrect confidence: % 6.4f' % avgBad) - - voteTab = numpy.zeros((nResultCodes, nResultCodes), numpy.int32) - for res in goodRes: - voteTab[res, res] += 1 - for ans, res, conf, idx in badVotes: - voteTab[ans, res] += 1 - - if verbose: - print() - print('\tResults Table:') - vTab = voteTab.transpose() - colCounts = numpy.sum(vTab, 0) - rowCounts = numpy.sum(vTab, 1) - message('') - for i in range(nResultCodes): - if rowCounts[i] == 0: - rowCounts[i] = 1 - row = vTab[i] - message(' ', noRet=1) - for j in range(nResultCodes): - entry = row[j] - message(' % 6d' % entry, noRet=1) - message(' | % 4.2f' % (100. * vTab[i, i] / rowCounts[i])) - message(' ', noRet=1) - for i in range(nResultCodes): - message('-------', noRet=1) - message('') - message(' ', noRet=1) - for i in range(nResultCodes): - if colCounts[i] == 0: - colCounts[i] = 1 - message(' % 6.2f' % (100. * vTab[i, i] / colCounts[i]), noRet=1) - message('') - - return nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab - - -def ScreenIt(composite, indices, data, partialVote=0, voteTol=0.0, verbose=1, screenResults=None, - goodVotes=None, badVotes=None, noVotes=None): - """ screens a set of data using a composite model and prints out - statistics about the screen. -#DOC - The work of doing the screening and processing the results is - handled by _DetailedScreen()_ - - **Arguments** - - - composite: the composite model to be used - - - data: the examples to be screened (a sequence of sequences) - it's assumed that the last element in each example is its "value" - - - partialVote: (optional) toggles use of the threshold value in - the screnning. - - - voteTol: (optional) the threshold to be used to decide whether or not a - given prediction should be kept - - - verbose: (optional) sets degree of verbosity of the screening - - - screenResults: (optional) the results of screening the results - (a sequence of 3-tuples in the format returned by - _CollectResults()_). If this is provided, the examples will not - be screened again. - - - goodVotes,badVotes,noVotes: (optional) if provided these should - be lists (or anything supporting an _append()_ method) which - will be used to pass the screening results back. - - - **Returns** - - a 7-tuple: - - 1) the number of good (correct) predictions - - 2) the number of bad (incorrect) predictions - - 3) the number of predictions skipped due to the _threshold_ - - 4) the average confidence in the good predictions - - 5) the average confidence in the bad predictions - - 6) the average confidence in the skipped predictions - - 7) None - - """ - if goodVotes is None: - goodVotes = [] - if badVotes is None: - badVotes = [] - if noVotes is None: - noVotes = [] - - if not partialVote: - voteTol = 0.0 - - DetailedScreen(indices, data, composite, voteTol, screenResults=screenResults, - goodVotes=goodVotes, badVotes=badVotes, noVotes=noVotes) - - nGood = len(goodVotes) - goodAccum = 0. - for res, pred, conf, idx in goodVotes: - goodAccum += conf - - misCount = len(badVotes) - badAccum = 0. - for res, pred, conf, idx in badVotes: - badAccum += conf - - nSkipped = len(noVotes) - goodSkipped = 0 - badSkipped = 0 - skipAccum = 0. - for ans, pred, conf, idx in noVotes: - skipAccum += conf - if ans != pred: - badSkipped += 1 - else: - goodSkipped += 1 - - nData = nGood + misCount + nSkipped - if verbose: - print('Total N Points:', nData) - if partialVote: - nCounted = nData - nSkipped - if verbose: - print('Misclassifications: %d (%%%4.2f)' % (misCount, 100. * float(misCount) / nCounted)) - print('N Skipped: %d (%%%4.2f)' % (nSkipped, 100. * float(nSkipped) / nData)) - print('\tGood Votes Skipped: %d (%%%4.2f)' % - (goodSkipped, 100. * float(goodSkipped) / nSkipped)) - print('\tBad Votes Skipped: %d (%%%4.2f)' % (badSkipped, 100. * float(badSkipped) / nSkipped)) - else: - if verbose: - print('Misclassifications: %d (%%%4.2f)' % (misCount, 100. * float(misCount) / nData)) - print('Average Correct Vote Confidence: % 6.4f' % (goodAccum / (nData - misCount))) - print('Average InCorrect Vote Confidence: % 6.4f' % (badAccum / misCount)) - - avgGood = 0 - avgBad = 0 - avgSkip = 0 - if nGood: - avgGood = goodAccum / nGood - if misCount: - avgBad = badAccum / misCount - if nSkipped: - avgSkip = skipAccum / nSkipped - return nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, None - - -def _processVoteList(votes, data): - """ *Internal Use Only* - - converts a list of 4 tuples: (answer,prediction,confidence,idx) into - an alternate list: (answer,prediction,confidence,data point) - - **Arguments** - - - votes: a list of 4 tuples: (answer, prediction, confidence, - index) - - - data: a _DataUtils.MLData.MLDataSet_ - - - **Note**: alterations are done in place in the _votes_ list - - """ - for i in range(len(votes)): - ans, pred, conf, idx = votes[i] - votes[i] = (ans, pred, conf, data[idx]) - - -def PrepareDataFromDetails(model, details, data, verbose=0): - if (hasattr(details, 'doHoldout') and details.doHoldout) or \ - (hasattr(details, 'doTraining') and details.doTraining): - try: - splitF = model._splitFrac - except AttributeError: - pass - else: - if verbose: - message('s', noRet=1) - - if hasattr(details, 'errorEstimate') and details.errorEstimate and \ - hasattr(details, 'doHoldout') and details.doHoldout: - message('*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*') - message('****** WARNING: OOB screening should not be combined with doHoldout option.') - message('*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*') - trainIdx, testIdx = SplitData.SplitIndices(data.GetNPts(), splitF, silent=1) - - if hasattr(details, 'filterFrac') and details.filterFrac != 0.0: - if verbose: - message('f', noRet=1) - trainFilt, temp = DataUtils.FilterData(data, details.filterVal, details.filterFrac, -1, - indicesToUse=trainIdx, indicesOnly=1) - testIdx += temp - trainIdx = trainFilt - elif hasattr(details, 'errorEstimate') and details.errorEstimate: - # the OOB screening works by checking to see if a given index - # is in the - if hasattr(details, 'filterFrac') and details.filterFrac != 0.0: - if verbose: - message('f', noRet=1) - testIdx, trainIdx = DataUtils.FilterData(data, details.filterVal, details.filterFrac, -1, - indicesToUse=range(data.GetNPts()), indicesOnly=1) - testIdx.extend(trainIdx) - else: - testIdx = list(range(data.GetNPts())) - trainIdx = [] - else: - testIdx = list(range(data.GetNPts())) - trainIdx = [] - if hasattr(details, 'doTraining') and details.doTraining: - testIdx, trainIdx = trainIdx, testIdx - return trainIdx, testIdx - - -def ScreenFromDetails(models, details, callback=None, setup=None, appendExamples=0, goodVotes=None, - badVotes=None, noVotes=None, data=None, enrichments=None): - """ Screens a set of data using a a _CompositeRun.CompositeRun_ - instance to provide parameters - -# DOC - - The actual data to be used are extracted from the database and - table specified in _details_ - - Aside from dataset construction, _ShowVoteResults()_ does most of - the heavy lifting here. - - **Arguments** - - - model: a composite model - - - details: a _CompositeRun.CompositeRun_ object containing details - (options, parameters, etc.) about the run - - - callback: (optional) if provided, this should be a function - taking a single argument that is called after each example is - screened with the number of examples screened so far as the - argument. - - - setup: (optional) a function taking a single argument which is - called at the start of screening with the number of points to - be screened as the argument. - - - appendExamples: (optional) this value is passed on to the - composite's _ClassifyExample()_ method. - - - goodVotes,badVotes,noVotes: (optional) if provided these should - be lists (or anything supporting an _append()_ method) which - will be used to pass the screening results back. - - - **Returns** - - a 7-tuple: - - 1) the number of good (correct) predictions - - 2) the number of bad (incorrect) predictions - - 3) the number of predictions skipped due to the _threshold_ - - 4) the average confidence in the good predictions - - 5) the average confidence in the bad predictions - - 6) the average confidence in the skipped predictions - - 7) the results table - - """ - if data is None: - if hasattr(details, 'pickleCol'): - data = details.GetDataSet(pickleCol=details.pickleCol, - pickleClass=DataStructs.ExplicitBitVect) - else: - data = details.GetDataSet() - if details.threshold > 0.0: - details.partialVote = 1 - else: - details.partialVote = 0 - - if type(models) not in [list, tuple]: - models = (models, ) - - nModels = len(models) - - if setup is not None: - setup(nModels * data.GetNPts()) - - nGood = numpy.zeros(nModels, float) - nBad = numpy.zeros(nModels, float) - nSkip = numpy.zeros(nModels, float) - confGood = numpy.zeros(nModels, float) - confBad = numpy.zeros(nModels, float) - confSkip = numpy.zeros(nModels, float) - voteTab = None - if goodVotes is None: - goodVotes = [] - if badVotes is None: - badVotes = [] - if noVotes is None: - noVotes = [] - if enrichments is None: - enrichments = [0.0] * nModels - badVoteDict = {} - noVoteDict = {} - - for i in range(nModels): - if nModels > 1: - goodVotes = [] - badVotes = [] - noVotes = [] - model = models[i] - - try: - seed = model._randomSeed - except AttributeError: - pass - else: - DataUtils.InitRandomNumbers(seed) - - if (hasattr(details, 'shuffleActivities') and details.shuffleActivities) or \ - (hasattr(details, 'randomActivities') and details.randomActivities): - if hasattr(details, 'shuffleActivities') and details.shuffleActivities: - shuffle = True - else: - shuffle = False - randomize = True - DataUtils.RandomizeActivities(data, shuffle=shuffle, runDetails=details) - else: - randomize = False - shuffle = False - - if hasattr(model, '_shuffleActivities') and \ - model._shuffleActivities and \ - not shuffle: - message('*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*') - message('****** WARNING: Shuffled model being screened with unshuffled data.') - message('*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*') - if hasattr(model, '_randomizeActivities') and \ - model._randomizeActivities and \ - not randomize: - message('*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*') - message('****** WARNING: Random model being screened with non-random data.') - message('*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*') - - trainIdx, testIdx = PrepareDataFromDetails(model, details, data) - - nPossible = model.GetQuantBounds()[1] - if callback: - cb = lambda x, y=callback, z=i * data.GetNPts(): y(x + z) - else: - cb = None - if not hasattr(details, 'errorEstimate') or not details.errorEstimate: - errorEstimate = 0 - else: - errorEstimate = 1 - g, b, s, aG, aB, aS, vT = ShowVoteResults(testIdx, data, model, nPossible[-1], - details.threshold, verbose=0, callback=cb, - appendExamples=appendExamples, goodVotes=goodVotes, - badVotes=badVotes, noVotes=noVotes, - errorEstimate=errorEstimate) - if voteTab is None: - voteTab = numpy.zeros(vT.shape, float) - if hasattr(details, 'errorAnalysis') and details.errorAnalysis: - for a, p, c, idx in badVotes: - label = testIdx[idx] - if hasattr(details, 'enrichTgt') and details.enrichTgt >= 0: - if a == details.enrichTgt: - badVoteDict[label] = badVoteDict.get(label, 0) + 1 - else: - badVoteDict[label] = badVoteDict.get(label, 0) + 1 - for a, p, c, idx in noVotes: - label = testIdx[idx] - if hasattr(details, 'enrichTgt') and details.enrichTgt >= 0: - if a == details.enrichTgt: - noVoteDict[label] = noVoteDict.get(label, 0) + 1 - else: - noVoteDict[label] = noVoteDict.get(label, 0) + 1 - - voteTab += vT - nGood[i] = g - nBad[i] = b - nSkip[i] = s - confGood[i] = aG - confBad[i] = aB - confSkip[i] = aS - - if hasattr(details, 'enrichTgt') and details.enrichTgt >= 0: - enrichments[i] = CalcEnrichment(vT, tgt=details.enrichTgt) - - if nModels == 1: - return g, b, s, aG, aB, aS, vT - else: - voteTab /= nModels - - avgNBad = sum(nBad) / nModels - devNBad = numpy.sqrt(sum((nBad - avgNBad)**2) / (nModels - 1)) - - # bestIdx = numpy.argsort(nBad)[0] - - avgNGood = sum(nGood) / nModels - devNGood = numpy.sqrt(sum((nGood - avgNGood)**2) / (nModels - 1)) - - avgNSkip = sum(nSkip) / nModels - devNSkip = numpy.sqrt(sum((nSkip - avgNSkip)**2) / (nModels - 1)) - - avgConfBad = sum(confBad) / nModels - devConfBad = numpy.sqrt(sum((confBad - avgConfBad)**2) / (nModels - 1)) - - avgConfGood = sum(confGood) / nModels - devConfGood = numpy.sqrt(sum((confGood - avgConfGood)**2) / (nModels - 1)) - - avgConfSkip = sum(confSkip) / nModels - devConfSkip = numpy.sqrt(sum((confSkip - avgConfSkip)**2) / (nModels - 1)) - return ((avgNGood, devNGood), (avgNBad, devNBad), (avgNSkip, devNSkip), - (avgConfGood, devConfGood), (avgConfBad, devConfBad), (avgConfSkip, - devConfSkip), voteTab) - - -def GetScreenImage(nGood, nBad, nRej, size=None): - if not hasPil: - return None - try: - nTot = float(nGood) + float(nBad) + float(nRej) - except TypeError: - nGood = nGood[0] - nBad = nBad[0] - nRej = nRej[0] - nTot = float(nGood) + float(nBad) + float(nRej) - - if not nTot: - return None - goodColor = (100, 100, 255) - badColor = (255, 100, 100) - rejColor = (255, 255, 100) - - pctGood = float(nGood) / nTot - pctBad = float(nBad) / nTot - pctRej = float(nRej) / nTot - - if size is None: - size = (100, 100) - img = Image.new('RGB', size, (255, 255, 255)) - draw = ImageDraw.Draw(img) - box = (0, 0, size[0] - 1, size[1] - 1) - - startP = -90 - endP = int(startP + pctGood * 360) - draw.pieslice(box, startP, endP, fill=goodColor) - startP = endP - endP = int(startP + pctBad * 360) - draw.pieslice(box, startP, endP, fill=badColor) - startP = endP - endP = int(startP + pctRej * 360) - draw.pieslice(box, startP, endP, fill=rejColor) - - return img - - -def ScreenToHtml(nGood, nBad, nRej, avgGood, avgBad, avgSkip, voteTable, imgDir='.', fullPage=1, - skipImg=0, includeDefs=1): - """ returns the text of a web page showing the screening details -#DOC - **Arguments** - - - nGood: number of correct predictions - - - nBad: number of incorrect predictions - - - nRej: number of rejected predictions - - - avgGood: average correct confidence - - - avgBad: average incorrect confidence - - - avgSkip: average rejected confidence - - - voteTable: vote table - - - imgDir: (optional) the directory to be used to hold the vote - image (if constructed) - - **Returns** - - a string containing HTML - - """ - if type(nGood) == tuple: - multModels = 1 - else: - multModels = 0 - - if fullPage: - outTxt = [""""""] - outTxt.append('

VOTE DETAILS

') - else: - outTxt = [] - - outTxt.append('') - - # Get the image - if not skipImg: - img = GetScreenImage(nGood, nBad, nRej) - if img: - if imgDir: - imgFileName = '/'.join((imgDir, 'votes.png')) - else: - imgFileName = 'votes.png' - img.save(imgFileName) - outTxt.append('
' % (imgFileName)) - - nPoss = len(voteTable) - pureCounts = numpy.sum(voteTable, 1) - accCounts = numpy.sum(voteTable, 0) - pureVect = numpy.zeros(nPoss, float) - accVect = numpy.zeros(nPoss, float) - for i in range(nPoss): - if pureCounts[i]: - pureVect[i] = float(voteTable[i, i]) / pureCounts[i] - if accCounts[i]: - accVect[i] = float(voteTable[i, i]) / accCounts[i] - - outTxt.append('
') - outTxt.append('') - for i in range(nPoss): - outTxt.append('' % i) - outTxt.append('') - outTxt.append('') - # outTxt.append(''%(nPoss+1)) - for i in range(nPoss): - outTxt.append('' % (i)) - for j in range(nPoss): - if i == j: - if not multModels: - outTxt.append('' % (voteTable[j, i])) - else: - outTxt.append('' % (voteTable[j, i])) - else: - if not multModels: - outTxt.append('' % (voteTable[j, i])) - else: - outTxt.append('' % (voteTable[j, i])) - outTxt.append('' % (nPoss)) - else: - outTxt.append('') - outTxt.append('') - for i in range(nPoss): - outTxt.append('' % (100.0 * pureVect[i])) - outTxt.append('') - outTxt.append('' % (nPoss)) - outTxt.append('
%d% Accurate
Predicted
%d%d%.2f%d%.2f%4.2f' % (100.0 * accVect[i])) - if i == 0: - outTxt.append('Predicted
% Pure%4.2f
Original
') - - if not multModels: - nTotal = nBad + nGood + nRej - nClass = nBad + nGood - if nClass: - pctErr = 100. * float(nBad) / nClass - else: - pctErr = 0.0 - - outTxt.append('

%d of %d examples were misclassified (%%%4.2f)' % - (nBad, nGood + nBad, pctErr)) - if nRej > 0: - pctErr = 100. * float(nBad) / (nGood + nBad + nRej) - outTxt.append('

%d of %d overall: (%%%4.2f)' % (nBad, nTotal, pctErr)) - pctRej = 100. * float(nRej) / nTotal - outTxt.append('

%d of %d examples were rejected (%%%4.2f)' % (nRej, nTotal, pctRej)) - if nGood != 0: - outTxt.append('

The correctly classified examples had an average confidence of %6.4f' % - avgGood) - - if nBad != 0: - outTxt.append('

The incorrectly classified examples had an average confidence of %6.4f' % - avgBad) - if nRej != 0: - outTxt.append('

The rejected examples had an average confidence of %6.4f' % avgSkip) - else: - nTotal = nBad[0] + nGood[0] + nRej[0] - nClass = nBad[0] + nGood[0] - devClass = nBad[1] + nGood[1] - if nClass: - pctErr = 100. * float(nBad[0]) / nClass - devPctErr = 100. * float(nBad[1]) / nClass - else: - pctErr = 0.0 - devPctErr = 0.0 - - outTxt.append('

%.2f(%.2f) of %.2f(%.2f) examples were misclassified (%%%4.2f(%4.2f))' % - (nBad[0], nBad[1], nClass, devClass, pctErr, devPctErr)) - if nRej > 0: - pctErr = 100. * float(nBad[0]) / nTotal - devPctErr = 100. * float(nBad[1]) / nTotal - outTxt.append('

%.2f(%.2f) of %d overall: (%%%4.2f(%4.2f))' % - (nBad[0], nBad[1], nTotal, pctErr, devPctErr)) - pctRej = 100. * float(nRej[0]) / nTotal - devPctRej = 100. * float(nRej[1]) / nTotal - outTxt.append('

%.2f(%.2f) of %d examples were rejected (%%%4.2f(%4.2f))' % - (nRej[0], nRej[1], nTotal, pctRej, devPctRej)) - if nGood != 0: - outTxt.append( - '

The correctly classified examples had an average confidence of %6.4f(%.4f)' % avgGood) - - if nBad != 0: - outTxt.append( - '

The incorrectly classified examples had an average confidence of %6.4f(%.4f)' % avgBad) - if nRej != 0: - outTxt.append('

The rejected examples had an average confidence of %6.4f(%.4f)' % avgSkip) - - outTxt.append('') - if includeDefs: - txt = """ -

Definitions: -

    -
  • % Pure: The percentage of, for example, known positives predicted to be positive. -
  • % Accurate: The percentage of, for example, predicted positives that actually - are positive. -
- """ - outTxt.append(txt) - - if fullPage: - outTxt.append("""""") - return '\n'.join(outTxt) - - -def MakePredPlot(details, indices, data, goodVotes, badVotes, nRes, idCol=0, verbose=0): - """ - - **Arguments** - - - details: a CompositeRun.RunDetails object - - - indices: a sequence of integer indices into _data_ - - - data: the data set in question. We assume that the ids for - the data points are in the _idCol_ column - - - goodVotes/badVotes: predictions where the model was correct/incorrect. - These are sequences of 4-tuples: - (answer,prediction,confidence,index into _indices_) - - """ - if not hasattr(details, 'predPlot') or not details.predPlot: - return - - if verbose: - message('\n-> Constructing Prediction (Hanneke) Plot') - outF = open(details.predPlot, 'w+') - gnuF = open('%s.gnu' % details.predPlot, 'w+') - # first get the ids of the data points we screened: - ptIds = [data[x][idCol] for x in indices] - - # get a connection to the database we'll use to grab the continuous - # activity values: - origConn = DbConnect(details.dbName, details.tableName, user=details.dbUser, - password=details.dbPassword) - colNames = origConn.GetColumnNames() - idName = colNames[idCol] - if not hasattr(details, 'predActTable') or \ - not details.predActTable or \ - details.predActTable == details.tableName: - actConn = origConn - else: - actConn = DbConnect(details.dbName, details.predActTable, user=details.dbUser, - password=details.dbPassword) - if verbose: - message('\t-> Pulling Activity Data') - - if type(ptIds[0]) not in [type(''), type(u'')]: - ptIds = [str(x) for x in ptIds] - whereL = [DbModule.placeHolder] * len(ptIds) - if hasattr(details, 'predActCol') and details.predActCol: - actColName = details.predActCol - else: - actColName = actConn.GetColumnNames()[-1] - - whereTxt = "%s in (%s)" % (idName, ','.join(whereL)) - rawD = actConn.GetData(fields='%s,%s' % (idName, actColName), where=whereTxt, extras=ptIds) - # order the data returned: - if verbose: - message('\t-> Creating Plot') - acts = [None] * len(ptIds) - for entry in rawD: - ID, act = entry - idx = ptIds.index(ID) - acts[idx] = act - outF.write('#ID Pred Conf %s\n' % (actColName)) - for ans, pred, conf, idx in goodVotes: - act = acts[idx] - if act != 'None': - act = float(act) - else: - act = 0 - outF.write('%s %d %.4f %f\n' % (ptIds[idx], pred, conf, act)) - for ans, pred, conf, idx in badVotes: - act = acts[idx] - if act != 'None': - act = float(act) - else: - act = 0 - outF.write('%s %d %.4f %f\n' % (ptIds[idx], pred, conf, act)) - outF.close() - if not hasattr(details, 'predLogScale') or not details.predLogScale: - actLabel = actColName - else: - actLabel = 'log(%s)' % (actColName) - actLabel = actLabel.replace('_', ' ') - gnuHdr = """# Generated by ScreenComposite.py version: %s - set size square 0.7 - set yrange [:1] - set data styl points - set ylab 'confidence' - set xlab '%s' - set grid - set nokey - set term postscript enh color solid "Helvetica" 16 - set term X - """ % (__VERSION_STRING, actLabel) - gnuF.write(gnuHdr) - plots = [] - for i in range(nRes): - if not hasattr(details, 'predLogScale') or not details.predLogScale: - plots.append("'%s' us 4:($2==%d?$3:0/0)" % (details.predPlot, i)) - else: - plots.append("'%s' us (log10($4)):($2==%d?$3:0/0)" % (details.predPlot, i)) - gnuF.write("plot %s\n" % (','.join(plots))) - gnuTail = """ - # EOF - """ - gnuF.write(gnuTail) - gnuF.close() - if hasattr(details, 'predShow') and details.predShow: - try: - try: - from Gnuplot import Gnuplot - except ImportError: - raise ImportError('Functionality requires the Gnuplot module') - p = Gnuplot() - p('cd "%s"' % (os.getcwd())) - p('load "%s.gnu"' % (details.predPlot)) - input('press return to continue...\n') - except Exception: - import traceback - traceback.print_exc() - - -def Go(details): - pass - - -def SetDefaults(details=None): - global _details - if details is None: - details = _details - CompositeRun.SetDefaults(details) - details.screenVoteTol = [0.] - details.detailedScreen = 0 - details.doHoldout = 0 - details.doTraining = 0 - details.errorAnalysis = 0 - details.verbose = 0 - details.partialVote = 0 - return details - - -def Usage(): - """ prints a list of arguments for when this is used from the - command line and then exits - - """ - print(__doc__) - sys.exit(-1) - - -def ShowVersion(includeArgs=0): - """ prints the version number of the program - - """ - print('This is ScreenComposite.py version %s' % (__VERSION_STRING)) - if includeArgs: - print('command line was:') - print(' '.join(sys.argv)) - - -def ParseArgs(details): - import getopt - try: - args, extras = getopt.getopt(sys.argv[1:], 'EDd:t:VN:HThSRF:v:AX', [ - 'predPlot=', - 'predActCol=', - 'predActTable=', - 'predLogScale', - 'predShow', - 'OOB', - 'pickleCol=', - 'enrich=', - ]) - except Exception: - import traceback - traceback.print_exc() - Usage() - - details.predPlot = '' - details.predActCol = '' - details.predActTable = '' - details.predLogScale = '' - details.predShow = 0 - details.errorEstimate = 0 - details.pickleCol = -1 - details.enrichTgt = -1 - for arg, val in args: - if arg == '-d': - details.dbName = val - elif arg == '-D': - details.detailedScreen = 1 - elif arg == '-t': - details.partialVote = 1 - voteTol = eval(val) - if type(voteTol) not in [type([]), type((1, 1))]: - voteTol = [voteTol] - for tol in voteTol: - if tol > 1 or tol < 0: - error('Voting threshold must be between 0 and 1') - sys.exit(-2) - details.screenVoteTol = voteTol - elif arg == '-N': - details.note = val - elif arg == '-H': - details.doTraining = 0 - details.doHoldout = 1 - elif arg == '-T': - details.doHoldout = 0 - details.doTraining = 1 - elif arg == '-E': - details.errorAnalysis = 1 - details.detailedScreen = 1 - elif arg == '-A': - details.showAll = 1 - details.detailedScreen = 1 - elif arg == '-S': - details.shuffleActivities = 1 - elif arg == '-R': - details.randomActivities = 1 - elif arg == '-h': - Usage() - elif arg == '-F': - details.filterFrac = float(val) - elif arg == '-v': - details.filterVal = float(val) - elif arg == '-V': - verbose = 1 - elif arg == '--predPlot': - details.detailedScreen = 1 - details.predPlot = val - elif arg == '--predActCol': - details.predActCol = val - elif arg == '--predActTable': - details.predActTable = val - elif arg == '--predLogScale': - details.predLogScale = 1 - elif arg == '--predShow': - details.predShow = 1 - elif arg == '--predShow': - details.predShow = 1 - elif arg == '--OOB': - details.errorEstimate = 1 - elif arg == '--pickleCol': - details.pickleCol = int(val) - 1 - elif arg == '--enrich': - details.enrichTgt = int(val) - else: - Usage() - - if len(extras) < 1: - Usage() - return extras - - -if __name__ == '__main__': - details = SetDefaults() - extras = ParseArgs(details) - ShowVersion(includeArgs=1) - - models = [] - if details.note and details.dbName: - tblName = extras[0] - message('-> Retrieving models from database') - conn = DbConnect(details.dbName, tblName) - blobs = conn.GetData(fields='model', where="where note='%s'" % (details.note)) - for blob in blobs: - blob = blob[0] - try: - models.append(pickle.loads(str(blob))) - except Exception: - import traceback - traceback.print_exc() - message('Model load failed') - - else: - message('-> Loading model') - modelFile = open(extras[0], 'rb') - models.append(pickle.load(modelFile)) - if not len(models): - error('No composite models found') - sys.exit(-1) - else: - message('-> Working with %d models.' % len(models)) - - extras = extras[1:] - - for fName in extras: - if details.dbName != '': - details.tableName = fName - data = details.GetDataSet(pickleCol=details.pickleCol, - pickleClass=DataStructs.ExplicitBitVect) - else: - data = DataUtils.BuildDataSet(fName) - descNames = data.GetVarNames() - nModels = len(models) - screenResults = [None] * nModels - dataSets = [None] * nModels - message('-> Constructing and screening data sets') - testIdx = list(range(data.GetNPts())) - trainIdx = testIdx - - for modelIdx in range(nModels): - # tmpD = copy.deepcopy(data) - tmpD = data - model = models[modelIdx] - message('.', noRet=1) - - try: - seed = model._randomSeed - except AttributeError: - pass - else: - DataUtils.InitRandomNumbers(seed) - - if details.shuffleActivities or details.randomActivities: - shuffle = details.shuffleActivities - randomize = 1 - DataUtils.RandomizeActivities(tmpD, shuffle=details.shuffleActivities, runDetails=details) - else: - randomize = False - shuffle = False - - if hasattr(model, '_shuffleActivities') and \ - model._shuffleActivities and \ - not shuffle: - message('*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*') - message('****** WARNING: Shuffled model being screened with unshuffled data.') - message('*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*') - if hasattr(model, '_randomizeActivities') and \ - model._randomizeActivities and \ - not randomize: - message('*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*') - message('****** WARNING: Random model being screened with non-random data.') - message('*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*!*') - - trainIdx, testIdx = PrepareDataFromDetails(model, details, tmpD, verbose=1) - screenResults[modelIdx] = CollectResults(testIdx, tmpD, model, - errorEstimate=details.errorEstimate) - dataSets[modelIdx] = testIdx - for tol in details.screenVoteTol: - if len(details.screenVoteTol) > 1: - message('\n-----*****-----*****-----*****-----*****-----*****-----*****-----\n') - message('Tolerance: %f' % tol) - nGood = numpy.zeros(nModels, float) - nBad = numpy.zeros(nModels, float) - nSkip = numpy.zeros(nModels, float) - confGood = numpy.zeros(nModels, float) - confBad = numpy.zeros(nModels, float) - confSkip = numpy.zeros(nModels, float) - if details.enrichTgt >= 0: - enrichments = numpy.zeros(nModels, float) - goodVoteDict = {} - badVoteDict = {} - noVoteDict = {} - voteTab = None - for modelIdx in range(nModels): - model = models[modelIdx] - model.SetInputOrder(descNames) - testIdx = dataSets[modelIdx] - screenRes = screenResults[modelIdx] - if not details.detailedScreen: - g, b, s, aG, aB, aS, vT = ScreenIt(model, testIdx, tmpD, details.partialVote, tol, - verbose=details.verbose, screenResults=screenRes) - else: - if model.GetActivityQuantBounds(): - nRes = len(model.GetActivityQuantBounds()) + 1 - else: - nRes = model.GetQuantBounds()[1][-1] - badVotes = [] - noVotes = [] - if (hasattr(details, 'showAll') and details.showAll) or \ - (hasattr(details, 'predPlot') and details.predPlot): - goodVotes = [] - else: - goodVotes = None - g, b, s, aG, aB, aS, vT = ShowVoteResults(testIdx, tmpD, model, nRes, tol, - verbose=details.verbose, - screenResults=screenRes, badVotes=badVotes, - noVotes=noVotes, goodVotes=goodVotes, - errorEstimate=details.errorEstimate) - if voteTab is None: - voteTab = numpy.zeros(vT.shape, float) - if details.errorAnalysis: - for a, p, c, idx in badVotes: - label = testIdx[idx] - if hasattr(details, 'enrichTgt') and details.enrichTgt >= 0: - if a == details.enrichTgt: - badVoteDict[label] = badVoteDict.get(label, 0) + 1 - else: - badVoteDict[label] = badVoteDict.get(label, 0) + 1 - for a, p, c, idx in noVotes: - label = testIdx[idx] - if hasattr(details, 'enrichTgt') and details.enrichTgt >= 0: - if a == details.enrichTgt: - noVoteDict[label] = noVoteDict.get(label, 0) + 1 - else: - noVoteDict[label] = noVoteDict.get(label, 0) + 1 - - if hasattr(details, 'showAll') and details.showAll: - for a, p, c, idx in goodVotes: - label = testIdx[idx] - if details.enrichTgt >= 0: - if a == details.enrichTgt: - goodVoteDict[label] = goodVoteDict.get(label, 0) + 1 - else: - goodVoteDict[label] = goodVoteDict.get(label, 0) + 1 - - if details.enrichTgt > -1: - enrichments[modelIdx] = CalcEnrichment(vT, tgt=details.enrichTgt) - - voteTab += vT - if details.detailedScreen and hasattr(details, 'predPlot') and details.predPlot: - MakePredPlot(details, testIdx, tmpD, goodVotes, badVotes, nRes, verbose=1) - - if hasattr(details, 'showAll') and details.showAll: - print('-v-v-v-v-v-v-v- All Votes -v-v-v-v-v-v-v-') - print('id, prediction, confidence, flag(-1=skipped,0=wrong,1=correct)') - for ans, pred, conf, idx in goodVotes: - pt = tmpD[testIdx[idx]] - assert model.GetActivityQuantBounds() or pt[-1] == ans, 'bad point?: %s != %s' % (str( - pt[-1]), str(ans)) - print('%s, %d, %.4f, 1' % (str(pt[0]), pred, conf)) - for ans, pred, conf, idx in badVotes: - pt = tmpD[testIdx[idx]] - assert model.GetActivityQuantBounds() or pt[-1] == ans, 'bad point?: %s != %s' % (str( - pt[-1]), str(ans)) - print('%s, %d, %.4f, 0' % (str(pt[0]), pred, conf)) - for ans, pred, conf, idx in noVotes: - pt = tmpD[testIdx[idx]] - assert model.GetActivityQuantBounds() or pt[-1] == ans, 'bad point?: %s != %s' % (str( - pt[-1]), str(ans)) - print('%s, %d, %.4f, -1' % (str(pt[0]), pred, conf)) - print('-^-^-^-^-^-^-^- -^-^-^-^-^-^-^-') - - nGood[modelIdx] = g - nBad[modelIdx] = b - nSkip[modelIdx] = s - confGood[modelIdx] = aG - confBad[modelIdx] = aB - confSkip[modelIdx] = aS - print() - - if nModels > 1: - print('-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*') - print('AVERAGES:') - - avgNBad = sum(nBad) / nModels - devNBad = numpy.sqrt(sum((nBad - avgNBad)**2) / (nModels - 1)) - - bestIdx = numpy.argsort(nBad)[0] - - avgNGood = sum(nGood) / nModels - devNGood = numpy.sqrt(sum((nGood - avgNGood)**2) / (nModels - 1)) - - avgNSkip = sum(nSkip) / nModels - devNSkip = numpy.sqrt(sum((nSkip - avgNSkip)**2) / (nModels - 1)) - - avgConfBad = sum(confBad) / nModels - devConfBad = numpy.sqrt(sum((confBad - avgConfBad)**2) / (nModels - 1)) - - avgConfGood = sum(confGood) / nModels - devConfGood = numpy.sqrt(sum((confGood - avgConfGood)**2) / (nModels - 1)) - - avgConfSkip = sum(confSkip) / nModels - devConfSkip = numpy.sqrt(sum((confSkip - avgConfSkip)**2) / (nModels - 1)) - - nClassified = avgNGood + avgNBad - nExamples = nClassified + avgNSkip - print('Misclassifications: \t%%%5.2f(%%%5.2f) %4.1f(%4.1f) / %d' % - (100 * avgNBad / nExamples, 100 * devNBad / nExamples, avgNBad, devNBad, nExamples)) - if avgNSkip > 0: - print('\tthreshold: \t%%%5.2f(%%%5.2f) %4.1f(%4.1f) / %d' % - (100 * avgNBad / nClassified, 100 * devNBad / nClassified, avgNBad, devNBad, - nClassified)) - print() - print('Number Skipped: %%%4.2f(%%%4.2f) %4.2f(%4.2f)' % - (100 * avgNSkip / nExamples, 100 * devNSkip / nExamples, avgNSkip, devNSkip)) - - print() - print('Confidences:') - print('\tCorrect: \t%4.2f(%4.2f)' % (100 * avgConfGood, 100 * devConfGood)) - print('\tIncorrect: \t%4.2f(%4.2f)' % (100 * avgConfBad, 100 * devConfBad)) - if avgNSkip > 0: - print('\tSkipped: \t%4.2f(%4.2f)' % (100 * avgConfSkip, 100 * devConfSkip)) - - if details.detailedScreen: - message('Results Table:') - voteTab = numpy.transpose(voteTab) / nModels - nResultCodes = len(voteTab) - colCounts = numpy.sum(voteTab, 0) - rowCounts = numpy.sum(voteTab, 1) - print() - for i in range(nResultCodes): - if rowCounts[i] == 0: - rowCounts[i] = 1 - row = voteTab[i] - message(' ', noRet=1) - for j in range(nResultCodes): - entry = row[j] - message(' % 6.2f' % entry, noRet=1) - message(' | % 4.2f' % (100. * voteTab[i, i] / rowCounts[i])) - message(' ', noRet=1) - for i in range(nResultCodes): - message('-------', noRet=1) - message('') - message(' ', noRet=1) - for i in range(nResultCodes): - if colCounts[i] == 0: - colCounts[i] = 1 - message(' % 6.2f' % (100. * voteTab[i, i] / colCounts[i]), noRet=1) - message('') - if details.enrichTgt > -1: - mean = sum(enrichments) / nModels - enrichments -= mean - dev = numpy.sqrt(sum(enrichments * enrichments)) / (nModels - 1) - message(' Enrichment of value %d: %.4f (%.4f)' % (details.enrichTgt, mean, dev)) - else: - bestIdx = 0 - print('------------------------------------------------') - print('Best Model: ', bestIdx + 1) - bestBad = nBad[bestIdx] - bestGood = nGood[bestIdx] - bestSkip = nSkip[bestIdx] - nClassified = bestGood + bestBad - nExamples = nClassified + bestSkip - print('Misclassifications: \t%%%5.2f %d / %d' % - (100 * bestBad / nExamples, bestBad, nExamples)) - if bestSkip > 0: - print('\tthreshold: \t%%%5.2f %d / %d' % - (100 * bestBad / nClassified, bestBad, nClassified)) - print() - print('Number Skipped: %%%4.2f %d' % (100 * bestSkip / nExamples, bestSkip)) - - print() - print('Confidences:') - print('\tCorrect: \t%4.2f' % (100 * confGood[bestIdx])) - print('\tIncorrect: \t%4.2f' % (100 * confBad[bestIdx])) - if bestSkip > 0: - print('\tSkipped: \t%4.2f' % (100 * confSkip[bestIdx])) - - if nModels == 1 and details.detailedScreen: - message('') - message('Results Table:') - voteTab = numpy.transpose(vT) - nResultCodes = len(vT) - colCounts = numpy.sum(voteTab, 0) - rowCounts = numpy.sum(voteTab, 1) - message('') - for i in range(nResultCodes): - if rowCounts[i] == 0: - rowCounts[i] = 1 - row = voteTab[i] - message(' ', noRet=1) - for j in range(nResultCodes): - entry = row[j] - message(' % 6.2f' % entry, noRet=1) - message(' | % 4.2f' % (100. * voteTab[i, i] / rowCounts[i])) - message(' ', noRet=1) - for i in range(nResultCodes): - message('-------', noRet=1) - message('') - message(' ', noRet=1) - for i in range(nResultCodes): - if colCounts[i] == 0: - colCounts[i] = 1 - message(' % 6.2f' % (100. * voteTab[i, i] / colCounts[i]), noRet=1) - message('') - if details.errorAnalysis: - message('\n*-*-*-*-*-*-*-*- ERROR ANALYSIS -*-*-*-*-*-*-*-*\n') - ks = badVoteDict.keys() - if len(ks): - message(' ---> Bad Vote Counts') - ks = noVoteDict.keys() - if len(ks): - message(' ---> Skipped Compound Counts') - for k in ks: - pt = data[k] - message('%s,%d' % (str(pt[0]), noVoteDict[k])) - - if hasattr(details, 'showAll') and details.showAll: - ks = goodVoteDict.keys() - if len(ks): - message(' ---> Good Vote Counts') - for k in ks: - pt = data[k] - message('%s,%d' % (str(pt[0]), goodVoteDict[k])) diff --git a/rdkit/ML/UnitTestAnalyzeComposite.py b/rdkit/ML/UnitTestAnalyzeComposite.py deleted file mode 100644 index 379e6e4c7a0..00000000000 --- a/rdkit/ML/UnitTestAnalyzeComposite.py +++ /dev/null @@ -1,61 +0,0 @@ -# $Id$ -# -# Copyright (C) 2004-2006 Greg Landrum and Rational Discovery LLC -# -# @@ All Rights Reserved @@ -# This file is part of the RDKit. -# The contents are covered by the terms of the BSD license -# which is included in the file license.txt, found at the root -# of the RDKit source tree. -# -"""unit testing code for the AnalyzeComposite functionality - -""" -import os -import pickle -import unittest - -from rdkit import RDConfig -from rdkit.ML import AnalyzeComposite - - -class TestCase(unittest.TestCase): - - def setUp(self): - self.baseDir = os.path.join(RDConfig.RDCodeDir, 'ML', 'test_data') - - def test1_Issue163(self): - name1 = os.path.join(self.baseDir, 'humanoral.1.pkl') - try: - with open(name1, 'rb') as pklF: - c1 = pickle.load(pklF) - except Exception: - c1 = None - self.assertTrue(c1) - name2 = os.path.join(self.baseDir, 'humanoral.2.pkl') - try: - with open(name2, 'rb') as pklF: - c2 = pickle.load(pklF) - except Exception: - c2 = None - self.assertTrue(c2) - - try: - res = sorted(AnalyzeComposite.ProcessIt([c1, c2], verbose=-1)) - except Exception: - import traceback - traceback.print_exc() - ok = 0 - else: - ok = 1 - self.assertTrue(ok) - - self.assertEqual(res[0][0], 'BALABANJ') - self.assertEqual(res[1][0], 'BERTZCT') - self.assertEqual(res[-1][0], 'VSA_ESTATE9') - for entry in res: - self.assertEqual(len(entry), 5) - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/UnitTestBuildComposite.py b/rdkit/ML/UnitTestBuildComposite.py deleted file mode 100644 index d6868ef9620..00000000000 --- a/rdkit/ML/UnitTestBuildComposite.py +++ /dev/null @@ -1,210 +0,0 @@ -# $Id$ -# -# Copyright (C) 2003-2008 greg Landrum and Rational Discovery LLC -# -# @@ All Rights Reserved @@ -# This file is part of the RDKit. -# The contents are covered by the terms of the BSD license -# which is included in the file license.txt, found at the root -# of the RDKit source tree. -# -"""unit testing code for the BuildComposite functionality - -""" -import io -import os -import pickle -import unittest - -from rdkit import RDConfig -from rdkit.Dbase.DbConnection import DbConnect -from rdkit.ML import BuildComposite - - -class TestCase(unittest.TestCase): - - def setUp(self): - self.baseDir = os.path.join(RDConfig.RDCodeDir, 'ML', 'test_data') - self.dbName = RDConfig.RDTestDatabase - - self.details = BuildComposite.SetDefaults() - self.details.dbName = self.dbName - self.details.dbUser = RDConfig.defaultDBUser - self.details.dbPassword = RDConfig.defaultDBPassword - - def _init(self, refCompos, copyBounds=0): - BuildComposite._verbose = 0 - conn = DbConnect(self.details.dbName, self.details.tableName) - cols = [x.upper() for x in conn.GetColumnNames()] - cDescs = [x.upper() for x in refCompos.GetDescriptorNames()] - self.assertEqual(cols, cDescs) - - self.details.nModels = 10 - self.details.lockRandom = 1 - self.details.randomSeed = refCompos._randomSeed - self.details.splitFrac = refCompos._splitFrac - - if self.details.splitFrac: - self.details.splitRun = 1 - else: - self.details.splitRun = 0 - - if not copyBounds: - self.details.qBounds = [0] * len(cols) - else: - self.details.qBounds = refCompos.GetQuantBounds()[0] - - def compare(self, compos, refCompos): - self.assertEqual(len(compos), len(refCompos)) - cs = [] - rcs = [] - for i in range(len(compos)): - cs.append(compos[i]) - rcs.append(refCompos[i]) - - cs.sort(key=lambda x: (x[2], x[2])) - rcs.sort(key=lambda x: (x[2], x[2])) - - for i in range(len(compos)): - _, count, err = cs[i] - _, refCount, refErr = rcs[i] - self.assertEqual(count, refCount) - self.assertAlmostEqual(err, refErr, 4) - - def test1_basics(self): - # """ basics """ - self.details.tableName = 'ferro_quant' - refComposName = 'ferromag_quant_10.pkl' - - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) - - # first make sure the data are intact - self._init(refCompos) - compos = BuildComposite.RunIt(self.details, saveIt=0) - - # pickle.dump(compos,open(os.path.join(self.baseDir,refComposName), 'wb')) - # with open(os.path.join(self.baseDir,refComposName), 'rb') as pklF: - # refCompos = pickle.load(pklF) - - self.compare(compos, refCompos) - - def test2_depth_limit(self): - # """ depth limit """ - self.details.tableName = 'ferro_quant' - refComposName = 'ferromag_quant_10_3.pkl' - - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) - - # first make sure the data are intact - self._init(refCompos) - self.details.limitDepth = 3 - compos = BuildComposite.RunIt(self.details, saveIt=0) - - self.compare(compos, refCompos) - - def test3_depth_limit_less_greedy(self): - # """ depth limit + less greedy """ - self.details.tableName = 'ferro_quant' - refComposName = 'ferromag_quant_10_3_lessgreedy.pkl' - - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) - - # first make sure the data are intact - self._init(refCompos) - self.details.limitDepth = 3 - self.details.lessGreedy = 1 - compos = BuildComposite.RunIt(self.details, saveIt=0) - - self.compare(compos, refCompos) - - def _test4_more_trees(self): - # """ more trees """ - self.details.tableName = 'ferro_quant' - refComposName = 'ferromag_quant_50_3.pkl' - - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) - - # first make sure the data are intact - self._init(refCompos) - self.details.limitDepth = 3 - self.details.nModels = 50 - compos = BuildComposite.RunIt(self.details, saveIt=0) - - self.compare(compos, refCompos) - - def test5_auto_bounds(self): - # """ auto bounds """ - self.details.tableName = 'ferro_noquant' - refComposName = 'ferromag_auto_10_3.pkl' - - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) - - # first make sure the data are intact - self._init(refCompos, copyBounds=1) - self.details.limitDepth = 3 - self.details.nModels = 10 - compos = BuildComposite.RunIt(self.details, saveIt=0) - - self.compare(compos, refCompos) - - def test6_auto_bounds_real_activity(self): - # """ auto bounds with a real valued activity""" - self.details.tableName = 'ferro_noquant_realact' - refComposName = 'ferromag_auto_10_3.pkl' - - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) - - # first make sure the data are intact - self._init(refCompos, copyBounds=1) - self.details.limitDepth = 3 - self.details.nModels = 10 - self.details.activityBounds = [0.5] - compos = BuildComposite.RunIt(self.details, saveIt=0) - - self.compare(compos, refCompos) - - def test7_composite_naiveBayes(self): - # """ Test composite of naive bayes""" - self.details.tableName = 'ferro_noquant' - refComposName = 'ferromag_NaiveBayes.pkl' - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTFile: - buf = pklTFile.read().replace('\r\n', '\n').encode('utf-8') - pklTFile.close() - with io.BytesIO(buf) as pklFile: - refCompos = pickle.load(pklFile) - self._init(refCompos, copyBounds=1) - self.details.useTrees = 0 - self.details.useNaiveBayes = 1 - self.details.mEstimateVal = 20.0 - self.details.qBounds = [0] + [2] * 6 + [0] - compos = BuildComposite.RunIt(self.details, saveIt=0) - - self.compare(compos, refCompos) - - -if __name__ == '__main__': # pragma: nocover - unittest.main() diff --git a/rdkit/ML/UnitTestScreenComposite.py b/rdkit/ML/UnitTestScreenComposite.py deleted file mode 100644 index f05eaac4cba..00000000000 --- a/rdkit/ML/UnitTestScreenComposite.py +++ /dev/null @@ -1,349 +0,0 @@ -# $Id$ -# -# Copyright (C) 2003-2008 greg Landrum and Rational Discovery LLC -# -# @@ All Rights Reserved @@ -# This file is part of the RDKit. -# The contents are covered by the terms of the BSD license -# which is included in the file license.txt, found at the root -# of the RDKit source tree. -# -"""unit testing code for the ScreenComposite functionality - -""" -import io -import os -import pickle -import unittest - -from rdkit import RDConfig -from rdkit.ML import ScreenComposite - - -class TestCase(unittest.TestCase): - - def setUp(self): - self.baseDir = os.path.join(RDConfig.RDCodeDir, 'ML', 'test_data') - self.dbName = RDConfig.RDTestDatabase - self.details = ScreenComposite.SetDefaults() - self.details.dbName = self.dbName - self.details.dbUser = RDConfig.defaultDBUser - self.details.dbPassword = RDConfig.defaultDBPassword - - def test1_basics(self): - # """ basics """ - self.details.tableName = 'ferro_quant' - with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 5 - self.assertEqual(len(compos), tgt) - - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 93) - self.assertEqual(misCount, 2) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .9871, 4) - self.assertAlmostEqual(avgBad, .8000, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 54) - self.assertEqual(tbl[1, 1], 39) - self.assertEqual(tbl[0, 1], 2) - self.assertEqual(tbl[1, 0], 0) - - def test2_include_holdout(self): - # """ include holdout data only """ - self.details.tableName = 'ferro_quant' - self.details.doHoldout = 1 - self.details.doTraining = 0 - - with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 5 - self.assertEqual(len(compos), tgt) - - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 28) - self.assertEqual(misCount, 1) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .9964, 4) - self.assertAlmostEqual(avgBad, 1.000, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 16) - self.assertEqual(tbl[1, 1], 12) - self.assertEqual(tbl[0, 1], 1) - self.assertEqual(tbl[1, 0], 0) - - def test3_include_training(self): - # """ include training data only """ - self.details.tableName = 'ferro_quant' - self.details.doHoldout = 0 - self.details.doTraining = 1 - - with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 5 - self.assertEqual(len(compos), tgt, 'bad composite loaded: %d != %d' % (len(compos), tgt)) - - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 65) - self.assertEqual(misCount, 1) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .98307, 4) - self.assertAlmostEqual(avgBad, 0.600, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 38, tbl) - self.assertEqual(tbl[1, 1], 27) - self.assertEqual(tbl[0, 1], 1) - self.assertEqual(tbl[1, 0], 0) - - def test4_thresholding(self): - # """ include thresholding """ - self.details.tableName = 'ferro_quant' - self.details.threshold = 0.80 - self.details.doHoldout = 0 - self.details.doTraining = 0 - - with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 5 - self.assertEqual(len(compos), tgt) - - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 91) - self.assertEqual(misCount, 1) - self.assertEqual(nSkipped, 3) - self.assertAlmostEqual(avgGood, 0.9956, 4) - self.assertAlmostEqual(avgBad, 1.000, 4) - self.assertAlmostEqual(avgSkip, 0.6000, 4) - self.assertEqual(tbl[0, 0], 54) - self.assertEqual(tbl[1, 1], 37) - self.assertEqual(tbl[0, 1], 1) - self.assertEqual(tbl[1, 0], 0) - - def test5_basics(self): - # """ basics """ - self.details.tableName = 'ferro_noquant' - - with open(os.path.join(self.baseDir, 'ferromag_auto_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - - tpl = ScreenComposite.ScreenFromDetails(compos, self.details) - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = tpl - - self.assertEqual(nGood, 95) - self.assertEqual(misCount, 8) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .9684, 4) - self.assertAlmostEqual(avgBad, .8375, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 50) - self.assertEqual(tbl[1, 1], 45) - self.assertEqual(tbl[0, 1], 5) - self.assertEqual(tbl[1, 0], 3) - - def test6_multiple_models(self): - # """ multiple models """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_auto_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - composites = [compos, compos] - tpl = ScreenComposite.ScreenFromDetails(composites, self.details) - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = tpl - self.assertEqual(nGood[0], 95) - self.assertEqual(misCount[0], 8) - self.assertEqual(nSkipped[0], 0) - self.assertAlmostEqual(avgGood[0], .9684, 4) - self.assertAlmostEqual(avgBad[0], .8375, 4) - self.assertAlmostEqual(avgSkip[0], 0.0, 4) - self.assertEqual(nGood[1], 0) - self.assertEqual(misCount[1], 0) - self.assertEqual(nSkipped[1], 0) - self.assertEqual(avgGood[1], 0) - self.assertEqual(avgBad[1], 0) - self.assertEqual(avgSkip[1], 0) - self.assertEqual(tbl[0, 0], 50) - self.assertEqual(tbl[1, 1], 45) - self.assertEqual(tbl[0, 1], 5) - self.assertEqual(tbl[1, 0], 3) - - def test7_shuffle(self): - # """ shuffle """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_shuffle_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.shuffleActivities = 1 - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 50) - self.assertEqual(misCount, 53) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .7380, 4) - self.assertAlmostEqual(avgBad, .7660, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 30) - self.assertEqual(tbl[1, 1], 20) - self.assertEqual(tbl[0, 1], 25) - self.assertEqual(tbl[1, 0], 28) - - def test8_shuffle_segmentation(self): - # """ shuffle with segmentation """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_shuffle_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.shuffleActivities = 1 - self.details.doHoldout = 1 - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 19) - self.assertEqual(misCount, 12) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .7737, 4) - self.assertAlmostEqual(avgBad, .7500, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 12) - self.assertEqual(tbl[1, 1], 7) - self.assertEqual(tbl[0, 1], 6) - self.assertEqual(tbl[1, 0], 6) - - def test9_shuffle_segmentation2(self): - # """ shuffle with segmentation2 """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_shuffle_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.shuffleActivities = 1 - self.details.doTraining = 1 - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 31) - self.assertEqual(misCount, 41) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .7161, 4) - self.assertAlmostEqual(avgBad, .7707, 4) - self.assertAlmostEqual(avgSkip, 0.0, 4) - self.assertEqual(tbl[0, 0], 18) - self.assertEqual(tbl[1, 1], 13) - self.assertEqual(tbl[0, 1], 19) - self.assertEqual(tbl[1, 0], 22) - - def test10_filtering(self): - # """ filtering """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_filt_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.filterVal = 1 - self.details.filterFrac = .33 - - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 90) - self.assertEqual(misCount, 13) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .9578, 4) - self.assertAlmostEqual(avgBad, .8538, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 54) - self.assertEqual(tbl[1, 1], 36) - self.assertEqual(tbl[0, 1], 1) - self.assertEqual(tbl[1, 0], 12) - - def test11_filtering_segmentation(self): - # """ filtering with segmentation """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_filt_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.doHoldout = 1 - self.details.filterVal = 1 - self.details.filterFrac = .33 - - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - - self.assertEqual(nGood, 37) - self.assertEqual(misCount, 6) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .95946, 4) - self.assertAlmostEqual(avgBad, .85, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 14) - self.assertEqual(tbl[1, 1], 23) - self.assertEqual(tbl[0, 1], 1) - self.assertEqual(tbl[1, 0], 5) - - def test12_naiveBayes_composite(self): - # """ test the naive bayes composite""" - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_NaiveBayes.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.doHoldout = 1 - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 25) - self.assertEqual(misCount, 6) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, 0.9800, 4) - self.assertAlmostEqual(avgBad, 0.86667, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 9) - self.assertEqual(tbl[0, 1], 6) - self.assertEqual(tbl[1, 0], 0) - self.assertEqual(tbl[1, 1], 16) - - -if __name__ == '__main__': # pragma: nocover - unittest.main()