diff --git a/test/LearningTest/TestKhiops/Standard/BUGIrisU/IrisExtended.kdic b/test/LearningTest/TestKhiops/Standard/BUGIrisU/IrisExtended.kdic new file mode 100644 index 000000000..f264f98f0 --- /dev/null +++ b/test/LearningTest/TestKhiops/Standard/BUGIrisU/IrisExtended.kdic @@ -0,0 +1,11 @@ +#Khiops 7.7.2i + +Dictionary Iris +{ +Unused Numerical SepalLength ; + Numerical SepalWidth ; +Unused Numerical PetalLength ; +Unused Numerical PetalWidth ; + Categorical SPetalLength = AsCategorical(Floor(PetalLength)) ; +Unused Categorical Class ; +}; diff --git a/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/AllReports.khj b/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/AllReports.khj new file mode 100644 index 000000000..9b9e4dd49 --- /dev/null +++ b/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/AllReports.khj @@ -0,0 +1,198 @@ +{ + "tool": "Khiops", + "version": "10.2.0", + "shortDescription": "", + "preparationReport": { + "reportType": "Preparation", + "summary": { + "dictionary": "Iris", + "variables": { + "types": [ + "Categorical", + "Numerical" + ], + "numbers": [ + 1, + 1 + ] + }, + "database": "..\/..\/..\/datasets\/Iris\/Iris.txt", + "samplePercentage": 70, + "samplingMode": "Include sample", + "selectionVariable": "", + "selectionValue": "", + "instances": 105, + "learningTask": "Unsupervised analysis", + "evaluatedVariables": 2, + "nativeVariables": 1, + "constructedVariables": 1, + "featureEngineering": { + "maxNumberOfConstructedVariables": 0, + "maxNumberOfTrees": 0, + "maxNumberOfVariablePairs": 100 + }, + "discretization": "EqualWidth", + "valueGrouping": "BasicGrouping" + }, + "variablesStatistics": [ + { + "rank": "R1", + "name": "SPetalLength", + "type": "Categorical", + "values": 5, + "mode": "1", + "modeFrequency": 38, + "constructionCost": 1.38629, + "derivationRule": "AsCategorical(Floor(PetalLength))" + }, + { + "rank": "R2", + "name": "SepalWidth", + "type": "Numerical", + "values": 23, + "min": 2, + "max": 4.4, + "mean": 3.081904762, + "stdDev": 0.4284592446, + "missingNumber": 0, + "constructionCost": 1.38629 + } + ], + "variablesDetailedStatistics": { + "R1": { + "dataGrid": { + "isSupervised": false, + "dimensions": [ + { + "variable": "SPetalLength", + "type": "Categorical", + "partitionType": "Value groups", + "partition": [ + ["1"], + ["5"], + ["4"], + ["3"], + ["6"] + ], + "defaultGroupIndex": 4 + } + ], + "frequencies": [38,27,25,8,7] + }, + "inputValues": { + "values": ["1","5","4","3","6"], + "frequencies": [38,27,25,8,7] + } + }, + "R2": { + "dataGrid": { + "isSupervised": false, + "dimensions": [ + { + "variable": "SepalWidth", + "type": "Numerical", + "partitionType": "Intervals", + "partition": [ + [2,2.25], + [2.25,2.45], + [2.45,2.75], + [2.75,2.95], + [2.95,3.25], + [3.25,3.45], + [3.45,3.65], + [3.65,3.95], + [3.95,4.15], + [4.15,4.4] + ] + } + ], + "frequencies": [2,3,14,17,41,12,5,7,2,2] + } + } + } + }, + "bivariatePreparationReport": { + "reportType": "BivariatePreparation", + "summary": { + "dictionary": "Iris", + "variables": { + "types": [ + "Categorical", + "Numerical" + ], + "numbers": [ + 1, + 1 + ] + }, + "database": "..\/..\/..\/datasets\/Iris\/Iris.txt", + "samplePercentage": 70, + "samplingMode": "Include sample", + "selectionVariable": "", + "selectionValue": "", + "instances": 105, + "learningTask": "Unsupervised analysis", + "evaluatedVariablePairs": 1, + "informativeVariablePairs": 1 + }, + "variablesPairsStatistics": [ + { + "rank": "R1", + "name1": "SPetalLength", + "name2": "SepalWidth", + "level": 0.0103105, + "variables": 2, + "parts1": 3, + "parts2": 3, + "cells": 9, + "constructionCost": 3.13205, + "preparationCost": 38.9735, + "dataCost": 497.662 + } + ], + "variablesPairsDetailedStatistics": { + "R1": { + "dataGrid": { + "isSupervised": false, + "dimensions": [ + { + "variable": "SPetalLength", + "type": "Categorical", + "partitionType": "Value groups", + "partition": [ + ["4"], + ["1"], + ["5"] + ], + "defaultGroupIndex": 0 + }, + { + "variable": "SepalWidth", + "type": "Numerical", + "partitionType": "Intervals", + "partition": [ + [2,2.95], + [2.95,3.25], + [3.25,4.4] + ] + } + ], + "cellIds": ["C1","C2","C3","C4","C5","C6","C7","C8","C9"], + "cellPartIndexes": [ + [0,0], + [1,0], + [2,0], + [0,1], + [1,1], + [2,1], + [0,2], + [1,2], + [2,2] + ], + "cellFrequencies": [26,1,9,10,15,16,4,22,2] + } + } + } + }, + "khiops_encoding": "ascii" +} diff --git a/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/PreparationReport.xls b/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/PreparationReport.xls new file mode 100644 index 000000000..8d32e583f --- /dev/null +++ b/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/PreparationReport.xls @@ -0,0 +1,87 @@ +#Khiops 10.2.0 +Descriptive statistics + + +Problem description +Short description + +Dictionary Iris +Variables + Categorical 1 + Numerical 1 + Total 2 + +Database ../../../datasets/Iris/Iris.txt +Sample percentage 70 +Sampling mode Include sample +Selection variable +Selection value +Instances 105 + +Learning task Unsupervised analysis + +Evaluated variables 2 +Native variables 1 +Constructed variables 1 + +Max number of constructed variables 0 +Max number of trees 0 +Max number of variable pairs 100 +Discretization EqualWidth +Value grouping BasicGrouping + + +Categorical variables statistics + +Rank Name Values Mode Mode coverage Constr. cost Derivation rule +R1 SPetalLength 5 1 0.361905 1.38629 AsCategorical(Floor(PetalLength)) + + +Numerical variables statistics + +Rank Name Values Min Max Mean Std dev Missing number Constr. cost Derivation rule +R2 SepalWidth 23 2 4.4 3.081904762 0.4284592446 0 1.38629 + + +-------------------------------------------------------------------------------- + +Variables detailed statistics + + +Rank R1 +Variable Categorical SPetalLength + +Variable stats +Group Frequency Coverage Size Value list +{1} 38 0.361905 1 1 +{5} 27 0.257143 1 5 +{4} 25 0.238095 1 4 +{3} 8 0.0761905 1 3 +{6} 7 0.0666667 1 6 * + + +Values +Value Frequency Coverage +1 38 0.361905 +5 27 0.257143 +4 25 0.238095 +3 8 0.0761905 +6 7 0.0666667 + +---------------------------------------------- +Rank R2 +Variable Numerical SepalWidth + +Variable stats +Interval Frequency Coverage +]-inf;2.25] 2 0.0190476 +]2.25;2.45] 3 0.0285714 +]2.45;2.75] 14 0.133333 +]2.75;2.95] 17 0.161905 +]2.95;3.25] 41 0.390476 +]3.25;3.45] 12 0.114286 +]3.45;3.65] 5 0.047619 +]3.65;3.95] 7 0.0666667 +]3.95;4.15] 2 0.0190476 +]4.15;+inf[ 2 0.0190476 + diff --git a/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/PreparationReport2D.xls b/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/PreparationReport2D.xls new file mode 100644 index 000000000..18ab5e43f --- /dev/null +++ b/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/PreparationReport2D.xls @@ -0,0 +1,77 @@ +#Khiops 10.2.0 +Descriptive statistics + + +Problem description +Short description + +Dictionary Iris +Variables + Categorical 1 + Numerical 1 + Total 2 + +Database ../../../datasets/Iris/Iris.txt +Sample percentage 70 +Sampling mode Include sample +Selection variable +Selection value +Instances 105 + +Learning task Unsupervised analysis + +Evaluated variable pairs 1 +Informative variable pairs 1 + + +Variables pairs statistics + +Rank Name 1 Name 2 Level Variables Parts 1 Parts 2 Cells Constr. cost Prep. cost Data cost +R1 SPetalLength SepalWidth 0.0103105 2 3 3 9 3.13205 38.9735 497.662 + + +-------------------------------------------------------------------------------- + +Variables pairs detailed statistics +(Pairs with two jointly informative variables) + + +Rank R1 +Variables + Type Name + Categorical SPetalLength + Numerical SepalWidth + +Variables stats +SPetalLength +Group Frequency Coverage Size Value list +{4} 40 0.380952 3 4 * +{1} 38 0.361905 1 1 +{5} 27 0.257143 1 5 +SepalWidth +Interval Frequency Coverage +]-inf;2.95] 36 0.342857 +]2.95;3.25] 41 0.390476 +]3.25;+inf[ 28 0.266667 + +Cell frequencies + SepalWidth +SPetalLength ]-inf;2.95] ]2.95;3.25] ]3.25;+inf[ Total Coverage +{4} 26 10 4 40 0.380952 +{1} 1 15 22 38 0.361905 +{5} 9 16 2 27 0.257143 +Total 36 41 28 105 +Coverage 0.342857 0.390476 0.266667 + +Cells 9 +Cell Id SPetalLength SepalWidth Frequency Coverage +C1 {4} ]-inf;2.95] 26 0.247619 +C8 {1} ]3.25;+inf[ 22 0.209524 +C6 {5} ]2.95;3.25] 16 0.152381 +C5 {1} ]2.95;3.25] 15 0.142857 +C4 {4} ]2.95;3.25] 10 0.0952381 +C3 {5} ]-inf;2.95] 9 0.0857143 +C7 {4} ]3.25;+inf[ 4 0.0380952 +C9 {5} ]3.25;+inf[ 2 0.0190476 +C2 {1} ]-inf;2.95] 1 0.00952381 + Total 105 1 diff --git a/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/err.txt b/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/err.txt new file mode 100644 index 000000000..6c4f3d81d --- /dev/null +++ b/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/err.txt @@ -0,0 +1,6 @@ +Train unsupervised model +Database ../../../datasets/Iris/Iris.txt: Read records: 150 Selected records: 105 +Evaluation of variable pairs +Data preparation time: 0:00:00.07 +Write report ./results\AllReports.khj + diff --git a/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/time.log b/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/time.log new file mode 100644 index 000000000..514a4afeb --- /dev/null +++ b/test/LearningTest/TestKhiops/Standard/BUGIrisU/results.ref/time.log @@ -0,0 +1 @@ +Overal time: 0.4408118724822998 diff --git a/test/LearningTest/TestKhiops/Standard/BUGIrisU/test.prm b/test/LearningTest/TestKhiops/Standard/BUGIrisU/test.prm new file mode 100644 index 000000000..98fcd668b --- /dev/null +++ b/test/LearningTest/TestKhiops/Standard/BUGIrisU/test.prm @@ -0,0 +1,54 @@ +// Thu Jun 12 13:31:06 2008 +// modl +// Output command file +// +//This file contains recorded commands, that can be replayed. +//Commands are based on user interactions: +// field update +// list index selection +// menu action +//Every command can be commented, using //. +//For example, commenting the last Exit command will allow other +//user interactions, after the commands have been replayed. +// +// + +// -> Data preparation and scoring +ClassManagement.OpenFile // Open... + +// -> Open +ClassFileName ../../../datasets/Iris/Iris.kdic // Dictionary file +ClassFileName ./IrisExtended.kdic // Dictionary file +OK // Open +// <- Open + +ClassManagement.ClassName Iris // Dictionary + +TrainDatabase.DatabaseFiles.List.Key Iris // List item selection +TrainDatabase.DatabaseFiles.DataTableName ../../../datasets/Iris/Iris.txt // Database file +TrainDatabase.SampleNumberPercentage 70 // Sample percentage + + +AnalysisSpec.TargetAttributeName // Target variable + +AnalysisSpec.PredictorsSpec.SelectiveNaiveBayesPredictor false // Predicteur Bayesien Naif Selectif + +AnalysisSpec.PredictorsSpec.ConstructionSpec.MaxAttributePairNumber 100 // Max number of pairs of variables + +AnalysisResults.ResultFilesDirectory ./results // Result files directory +AnalysisResults.Preparation2DFileName PreparationReport2D.xls // Preparation report + + + +AnalysisSpec.PredictorsSpec.ConstructionSpec.MaxTreeNumber 0 // Max Tree number +AnalysisSpec.PredictorsSpec.ConstructionSpec.MaxConstructedAttributeNumber 0 // Max number of constructed variables +ComputeStats // Analyse database + +Exit // Close +// <- Data preparation and scoring + + +// -> Data preparation and scoring +OK // Close +// <- Data preparation and scoring +