From 44c30c9667afb22555eb1fdde944511f2d0d8bfc Mon Sep 17 00:00:00 2001 From: taniya-das Date: Thu, 25 Jul 2024 16:43:04 +0200 Subject: [PATCH] Integration of structured query and database filtering --- backend/config.json | 5 +- data/attribute_info.json | 2 +- frontend/ui.py | 19 +- frontend/ui_utils.py | 34 +- start_local.sh | 5 +- start_structured_query_service.sh | 19 + structured_query/__init__.py | 0 structured_query/chroma_store_utilis.py | 44 + .../llm_service_structured_query.py | 15 +- .../test_chroma_db_from_pandas.ipynb | 316 ++-- .../{selfquery.ipynb => test_selfquery.ipynb} | 86 +- .../test_strutured_query_pipeline.ipynb | 1623 +++++++++++++++++ 12 files changed, 1924 insertions(+), 244 deletions(-) create mode 100755 start_structured_query_service.sh create mode 100644 structured_query/__init__.py create mode 100644 structured_query/chroma_store_utilis.py rename useful_scripts/{selfquery.ipynb => test_selfquery.ipynb} (97%) create mode 100644 useful_scripts/test_strutured_query_pipeline.ipynb diff --git a/backend/config.json b/backend/config.json index 0b11c1c..6108d08 100644 --- a/backend/config.json +++ b/backend/config.json @@ -17,6 +17,7 @@ "search_type" : "similarity", "reranking" : false, "long_context_reorder" : false, - "structure_query": false, - "use_chroma_for_saving_metadata": false + "structured_query": false, + "use_chroma_for_saving_metadata": false, + "chroma_metadata_dir": "../data/chroma_db_metadata" } \ No newline at end of file diff --git a/data/attribute_info.json b/data/attribute_info.json index 51fee8d..9c2236d 100644 --- a/data/attribute_info.json +++ b/data/attribute_info.json @@ -1 +1 @@ -[{"name": "Unnamed: 0", "description": "Unique identifier", "type": "Numeric"}, {"name": "did", "description": "Dataset ID", "type": "Numeric"}, {"name": "name", "description": "Name of the dataset", "type": "String"}, {"name": "version", "description": "Version of this dataset. '1' for original version. Auto-incremented by server.", "type": "Numeric"}, {"name": "uploader", "description": "ID of the uploader", "type": "Numeric"}, {"name": "status", "description": "Current status of the dataset. Whether the dataset is active.", "type": "String"}, {"name": "format", "description": "Format of the dataset. Example - arff format ", "type": "String"}, {"name": "MajorityClassSize", "description": "Number of instances belonging to the most frequent class.", "type": "Numeric"}, {"name": "MaxNominalAttDistinctValues", "description": "The maximum number of distinct values among attributes of the nominal type.", "type": "Numeric"}, {"name": "MinorityClassSize", "description": "Number of instances belonging to the least frequent class.", "type": "Numeric"}, {"name": "NumberOfClasses", "description": "Number of classes in the dataset. 2.0 for binary classification, and more than 2.0 for multi-class classification.", "type": "Numeric"}, {"name": "NumberOfFeatures", "description": "Number of features or attributes in the dataset.", "type": "Numeric"}, {"name": "NumberOfInstances", "description": "Number of instances in the dataset", "type": "Numeric"}, {"name": "NumberOfInstancesWithMissingValues", "description": "Number of instances with missing values in the dataset", "type": "Numeric"}, {"name": "NumberOfMissingValues", "description": "Number of missing values in the dataset", "type": "Numeric"}, {"name": "NumberOfNumericFeatures", "description": "Number of numeric features in the dataset", "type": "Numeric"}, {"name": "NumberOfSymbolicFeatures", "description": "Number of symbolic features in the dataset", "type": "Numeric"}, {"name": "description", "description": "Description of the dataset", "type": "String"}, {"name": "qualities", "description": "Qualities of the dataset", "type": "String"}, {"name": "features", "description": "Features of the dataset", "type": "String"}, {"name": "Combined_information", "description": "Combine information from all the coulmns in the dataset.", "type": "String"}] \ No newline at end of file +[{"name": "Unnamed: 0", "description": "Unique identifier", "type": "Numeric"}, {"name": "did", "description": "Dataset ID", "type": "Numeric"}, {"name": "name", "description": "Name of the dataset", "type": "String"}, {"name": "version", "description": "Version of this dataset. '1' for original version. Auto-incremented by server.", "type": "Numeric"}, {"name": "uploader", "description": "ID of the uploader", "type": "Numeric"}, {"name": "status", "description": "Current status of the dataset. Whether the dataset is active.", "type": "String"}, {"name": "format", "description": "Format of the dataset. Example - arff format ", "type": "String"}, {"name": "MajorityClassSize", "description": "Number of instances belonging to the most frequent class.", "type": "Numeric"}, {"name": "MaxNominalAttDistinctValues", "description": "The maximum number of distinct values among attributes of the nominal type.", "type": "Numeric"}, {"name": "MinorityClassSize", "description": "Number of instances belonging to the least frequent class.", "type": "Numeric"}, {"name": "NumberOfClasses", "description": "Number of classes in the dataset. 2.0 for binary classification, and more than 2.0 for multi-class classification.", "type": "Float"}, {"name": "NumberOfFeatures", "description": "Number of features or attributes in the dataset.", "type": "Numeric"}, {"name": "NumberOfInstances", "description": "Number of instances in the dataset", "type": "Numeric"}, {"name": "NumberOfInstancesWithMissingValues", "description": "Number of instances with missing values in the dataset", "type": "Numeric"}, {"name": "NumberOfMissingValues", "description": "Number of missing values in the dataset", "type": "Numeric"}, {"name": "NumberOfNumericFeatures", "description": "Number of numeric features in the dataset", "type": "Numeric"}, {"name": "NumberOfSymbolicFeatures", "description": "Number of symbolic features in the dataset", "type": "Numeric"}, {"name": "description", "description": "Description of the dataset", "type": "String"}, {"name": "qualities", "description": "Qualities of the dataset", "type": "String"}, {"name": "features", "description": "Features of the dataset", "type": "String"}, {"name": "Combined_information", "description": "Combine information from all the coulmns in the dataset.", "type": "String"}] \ No newline at end of file diff --git a/frontend/ui.py b/frontend/ui.py index 63a6227..8540ba3 100644 --- a/frontend/ui.py +++ b/frontend/ui.py @@ -9,6 +9,13 @@ with open("../backend/config.json", "r") as file: config = json.load(file) +# Load metadata chroma database +if config['structure_query']: + import sys + sys.path.append('../') + from structured_query.chroma_store_utilis import * + collec = load_chroma_metadata() + # Metadata paths data_metadata_path = Path(config["data_dir"]) / "all_dataset_description.csv" flow_metadata_path = Path(config["data_dir"]) / "all_flow_description.csv" @@ -26,28 +33,30 @@ st.session_state["query"] = query st.session_state["query_type"] = query_type - # Submit button logic if st.button("Submit"): - response_parser = ResponseParser(query_type, apply_llm_before_rag=True) + response_parser = ResponseParser(query_type, apply_llm_before_rag=False) if query_type == "Dataset": with st.spinner("Waiting for results..."): if config["structure_query"] == True: # get structured query - structured_query = response_parser.fetch_structured_query( + response_parser.fetch_structured_query( query_type, query ) - st.write(structured_query) + st.write(response_parser.structured_query_response[0]) # get rag response response_parser.fetch_rag_response( - query_type, structured_query["query"] + query_type, response_parser.structured_query_response[0]["query"] ) + if response_parser.structured_query_response[1].get("filter"): + response_parser.database_filter(response_parser.structured_query_response[1]["filter"], collec) else: # get rag response response_parser.fetch_rag_response(query_type, query) # get llm response response_parser.fetch_llm_response(query) # get updated columns based on llm response + results = response_parser.parse_and_update_response(data_metadata) # display results in a table display_results(results) diff --git a/frontend/ui_utils.py b/frontend/ui_utils.py index 6f02c8d..39a8409 100644 --- a/frontend/ui_utils.py +++ b/frontend/ui_utils.py @@ -4,6 +4,7 @@ import requests import streamlit as st from streamlit import session_state as ss +from langchain_community.query_constructors.chroma import ChromaTranslator def feedback_cb(): @@ -108,6 +109,8 @@ def __init__(self, query_type, apply_llm_before_rag=False): self.rag_response = None self.llm_response = None self.apply_llm_before_rag = apply_llm_before_rag + self.database_filtered = None + self.structured_query_response = None def load_paths(self): """ @@ -148,9 +151,19 @@ def fetch_structured_query(self, query_type, query): f"{structured_response_path['local']}{query}", json={"query": query}, ).json() - print(self.structured_query_response) + return self.structured_query_response - + + def database_filter(self, filter_condition, collec): + """ + Apply database filter on the rag_response + """ + ids = list(map(str, self.rag_response['initial_response'])) + self.database_filtered = collec.get(ids = ids, where=filter_condition)['ids'] + self.database_filtered = list(map(int, self.database_filtered)) + # print(self.database_filtered) + return self.database_filtered + def fetch_rag_response(self, query_type, query): """ Description: Fetch the response from the FastAPI service @@ -198,7 +211,20 @@ def parse_and_update_response(self, metadata): return metadata elif ( self.rag_response is not None and self.structured_query_response is not None - ): - return metadata[["did", "name"]] + ): + col_name = ["status", "NumberOfClasses", "NumberOfFeatures", "NumberOfInstances"] + if self.structured_query_response[0].get("filter"): + filtered_metadata = metadata[ + metadata["did"].isin(self.database_filtered) + ] + print("Showing database filtered data") + else: + filtered_metadata = metadata[ + metadata["did"].isin(self.rag_response['initial_response']) + ] + print("Showing only rag response") + return filtered_metadata[["did", "name", *col_name]] else: return metadata + + diff --git a/start_local.sh b/start_local.sh index 6926758..5ce13af 100755 --- a/start_local.sh +++ b/start_local.sh @@ -10,7 +10,10 @@ cd ollama || exit ./get_ollama.sh & echo $! > $PID_FILE -structured_query = false +# Fetch configuration from ../backend/config.json +config_file="../backend/config.json" +structured_query=$(jq -r '.structured_query' $config_file) + if [ "$structured_query" == true ]; then cd ../structured_query || exit uvicorn llm_service_structured_query:app --host 0.0.0.0 --port 8082 & diff --git a/start_structured_query_service.sh b/start_structured_query_service.sh new file mode 100755 index 0000000..ef8b751 --- /dev/null +++ b/start_structured_query_service.sh @@ -0,0 +1,19 @@ +#!/bin/bash +poetry install +killall ollama +killall streamlit +# Define a file to store the PIDs +PID_FILE="processes.pid" + +# Start processes and save their PIDs +cd ollama +./get_ollama.sh & +echo $! > $PID_FILE + +cd ../structured_query +uvicorn llm_service_structured_query:app --host 0.0.0.0 --port 8082 & +echo $! > $PID_FILE + +cd .. +# Keep the script running to maintain the background processes +wait \ No newline at end of file diff --git a/structured_query/__init__.py b/structured_query/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/structured_query/chroma_store_utilis.py b/structured_query/chroma_store_utilis.py new file mode 100644 index 0000000..5ea16df --- /dev/null +++ b/structured_query/chroma_store_utilis.py @@ -0,0 +1,44 @@ +import sqlalchemy +import pandas as pd +import chromadb +from langchain_community.vectorstores.chroma import Chroma +from tqdm.auto import tqdm + +import sys + +sys.path.append("../") +sys.path.append("../backend/") +from backend.modules.utils import load_config_and_device + +config = load_config_and_device("../backend/config.json") + +# load the persistent database using ChromaDB +client = chromadb.PersistentClient(path=config["chroma_metadata_dir"]) + +collec = client.get_or_create_collection(name = "metadata") + +metadata_df = pd.read_csv("../data/all_dataset_description.csv") +metadata_df = metadata_df.drop(columns=["Combined_information"]) + +# Function to chunk the DataFrame +def chunk_dataframe(df, chunk_size): + for i in range(0, df.shape[0], chunk_size): + yield df.iloc[i : i + chunk_size] + +def load_chroma_metadata(): + # Define the chunk size + chunk_size = 100 # Adjust the chunk size as needed + + # Process each chunk + for chunk in tqdm( + chunk_dataframe(metadata_df, chunk_size), total=(len(metadata_df) // chunk_size) + 1 + ): + ids = chunk["did"].astype(str).tolist() + documents = chunk["description"].astype(str).tolist() + metadatas = chunk.to_dict(orient="records") + + # Add to ChromaDB collection + if collec.get(ids=ids) == []: + collec.add(ids=ids, documents=documents, metadatas=metadatas) + + return collec diff --git a/structured_query/llm_service_structured_query.py b/structured_query/llm_service_structured_query.py index 4f8224a..a2f113e 100644 --- a/structured_query/llm_service_structured_query.py +++ b/structured_query/llm_service_structured_query.py @@ -8,7 +8,7 @@ sys.path.append("../") sys.path.append("../data") -with open("attribute_info.json", "r") as f: +with open("../data/attribute_info.json", "r") as f: attribute_info = json.loads(f.read()) attribute_info = attribute_info[1:] @@ -52,19 +52,22 @@ from httpx import ConnectTimeout # from llm_service_utils import create_chain, parse_answers_initial from tenacity import retry, retry_if_exception_type, stop_after_attempt +from langchain_community.query_constructors.chroma import ChromaTranslator app = FastAPI() - +print("[INFO] Starting structured query service.") # Create port @app.get("/structuredquery/{query}", response_class=JSONResponse) -@retry(stop=stop_after_attempt(3), retry=retry_if_exception_type(ConnectTimeout)) +@retry(stop=stop_after_attempt(1), retry=retry_if_exception_type(ConnectTimeout)) async def get_structured_query(query: str): """ - Description: Get the query, replace %20 with space and invoke the chain to get the answers based on the prompt - + Description: Get the query, replace %20 with space and invoke the chain to get the answers based on the prompt. """ query = query.replace("%20", " ") response = chain.invoke({"query": query}) - return response + obj = ChromaTranslator() + filter_condition = obj.visit_structured_query(structured_query=response)[1] + + return response, filter_condition diff --git a/useful_scripts/test_chroma_db_from_pandas.ipynb b/useful_scripts/test_chroma_db_from_pandas.ipynb index dd4237b..62c1cbf 100644 --- a/useful_scripts/test_chroma_db_from_pandas.ipynb +++ b/useful_scripts/test_chroma_db_from_pandas.ipynb @@ -2,14 +2,14 @@ "cells": [ { "cell_type": "code", - "execution_count": 21, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/smukherjee/.pyenv/versions/3.10.14/envs/openml/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + "/home/taniya_das/Documents/ai_search/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } @@ -24,16 +24,16 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "metadata_df = pd.read_csv(\"../../data/all_dataset_description.csv\")" + "metadata_df = pd.read_csv(\"../data/all_dataset_description.csv\")" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -51,25 +51,25 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "collec = client.get_or_create_collection(\"metadata\")" + "collec = client.get_or_create_collection(name = \"metadata\")" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "client = chromadb.PersistentClient(path=\"test\")" + "client = chromadb.PersistentClient(path=\"../data/chroma_db_metadata\")" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -81,18 +81,14 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "100%|██████████| 57/57 [07:16<00:00, 7.66s/it]\n" + "100%|██████████| 57/57 [02:49<00:00, 2.98s/it]\n" ] } ], @@ -114,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 90, "metadata": {}, "outputs": [], "source": [ @@ -123,185 +119,55 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame.from_records(collec.get())" + ] + }, + { + "cell_type": "code", + "execution_count": 92, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datadocumentsembeddingsidsmetadatasuris
0None**Author**: \\n**Source**: Unknown - \\n**Pl...None10{'MajorityClassSize': 81.0, 'MaxNominalAttDist...None
1None**Author**: \\n**Source**: Unknown - Date unk...None1000{'MajorityClassSize': 3481.0, 'MaxNominalAttDi...None
2None**Author**: \\n**Source**: Unknown - Date unk...None1001{'MajorityClassSize': 70.0, 'MaxNominalAttDist...None
3None**Author**: \\n**Source**: Unknown - Date unk...None1002{'MajorityClassSize': 6694.0, 'MaxNominalAttDi...None
4None**Author**: \\n**Source**: Unknown - Date unk...None1003{'MajorityClassSize': 255.0, 'MaxNominalAttDis...None
.....................
5685None**Author**: \\n**Source**: Unknown - Date unk...None995{'MajorityClassSize': 1800.0, 'MaxNominalAttDi...None
5686None**Author**: \\n**Source**: Unknown - Date unk...None996{'MajorityClassSize': 138.0, 'MaxNominalAttDis...None
5687None**Author**: \\n**Source**: Unknown - Date unk...None997{'MajorityClassSize': 337.0, 'MaxNominalAttDis...None
5688None**Author**: \\n**Source**: Unknown - Date unk...None998{'MajorityClassSize': 33.0, 'MaxNominalAttDist...None
5689None**Author**: \\n**Source**: Unknown - Date unk...None999{'MajorityClassSize': 169.0, 'MaxNominalAttDis...None
\n", - "

5690 rows × 6 columns

\n", - "
" - ], "text/plain": [ - " data documents embeddings \\\n", - "0 None **Author**: \\n**Source**: Unknown - \\n**Pl... None \n", - "1 None **Author**: \\n**Source**: Unknown - Date unk... None \n", - "2 None **Author**: \\n**Source**: Unknown - Date unk... None \n", - "3 None **Author**: \\n**Source**: Unknown - Date unk... None \n", - "4 None **Author**: \\n**Source**: Unknown - Date unk... None \n", - "... ... ... ... \n", - "5685 None **Author**: \\n**Source**: Unknown - Date unk... None \n", - "5686 None **Author**: \\n**Source**: Unknown - Date unk... None \n", - "5687 None **Author**: \\n**Source**: Unknown - Date unk... None \n", - "5688 None **Author**: \\n**Source**: Unknown - Date unk... None \n", - "5689 None **Author**: \\n**Source**: Unknown - Date unk... None \n", - "\n", - " ids metadatas uris \n", - "0 10 {'MajorityClassSize': 81.0, 'MaxNominalAttDist... None \n", - "1 1000 {'MajorityClassSize': 3481.0, 'MaxNominalAttDi... None \n", - "2 1001 {'MajorityClassSize': 70.0, 'MaxNominalAttDist... None \n", - "3 1002 {'MajorityClassSize': 6694.0, 'MaxNominalAttDi... None \n", - "4 1003 {'MajorityClassSize': 255.0, 'MaxNominalAttDis... None \n", - "... ... ... ... \n", - "5685 995 {'MajorityClassSize': 1800.0, 'MaxNominalAttDi... None \n", - "5686 996 {'MajorityClassSize': 138.0, 'MaxNominalAttDis... None \n", - "5687 997 {'MajorityClassSize': 337.0, 'MaxNominalAttDis... None \n", - "5688 998 {'MajorityClassSize': 33.0, 'MaxNominalAttDist... None \n", - "5689 999 {'MajorityClassSize': 169.0, 'MaxNominalAttDis... None \n", - "\n", - "[5690 rows x 6 columns]" + "{'MajorityClassSize': 81.0,\n", + " 'MaxNominalAttDistinctValues': 8.0,\n", + " 'MinorityClassSize': 2.0,\n", + " 'NumberOfClasses': 4.0,\n", + " 'NumberOfFeatures': 19.0,\n", + " 'NumberOfInstances': 148.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 3.0,\n", + " 'NumberOfSymbolicFeatures': 16.0,\n", + " 'Unnamed: 0': 8,\n", + " 'description': \"**Author**: \\n**Source**: Unknown - \\n**Please cite**: \\n\\nCitation Request:\\n This lymphography domain was obtained from the University Medical Centre,\\n Institute of Oncology, Ljubljana, Yugoslavia. Thanks go to M. Zwitter and \\n M. Soklic for providing the data. Please include this citation if you plan\\n to use this database.\\n \\n 1. Title: Lymphography Domain\\n \\n 2. Sources: \\n (a) See Above.\\n (b) Donors: Igor Kononenko, \\n University E.Kardelj\\n Faculty for electrical engineering\\n Trzaska 25\\n 61000 Ljubljana (tel.: (38)(+61) 265-161\\n \\n Bojan Cestnik\\n Jozef Stefan Institute\\n Jamova 39\\n 61000 Ljubljana\\n Yugoslavia (tel.: (38)(+61) 214-399 ext.287) \\n (c) Date: November 1988\\n \\n 3. Past Usage: (sveral)\\n 1. Cestnik,G., Konenenko,I, & Bratko,I. (1987). Assistant-86: A\\n Knowledge-Elicitation Tool for Sophisticated Users. In I.Bratko\\n & N.Lavrac (Eds.) Progress in Machine Learning, 31-45, Sigma Press.\\n -- Assistant-86: 76% accuracy\\n 2. Clark,P. & Niblett,T. (1987). Induction in Noisy Domains. In\\n I.Bratko & N.Lavrac (Eds.) Progress in Machine Learning, 11-30,\\n Sigma Press.\\n -- Simple Bayes: 83% accuracy\\n -- CN2 (99% threshold): 82%\\n 3. Michalski,R., Mozetic,I. Hong,J., & Lavrac,N. (1986). The Multi-Purpose\\n Incremental Learning System AQ15 and its Testing Applications to Three\\n Medical Domains. In Proceedings of the Fifth National Conference on\\n Artificial Intelligence, 1041-1045. Philadelphia, PA: Morgan Kaufmann.\\n -- Experts: 85% accuracy (estimate)\\n -- AQ15: 80-82%\\n \\n 4. Relevant Information:\\n This is one of three domains provided by the Oncology Institute\\n that has repeatedly appeared in the machine learning literature.\\n (See also breast-cancer and primary-tumor.)\\n \\n 5. Number of Instances: 148\\n \\n 6. Number of Attributes: 19 including the class attribute\\n \\n 7. Attribute information:\\n --- NOTE: All attribute values in the database have been entered as\\n numeric values corresponding to their index in the list\\n of attribute values for that attribute domain as given below.\\n 1. class: normal find, metastases, malign lymph, fibrosis\\n 2. lymphatics: normal, arched, deformed, displaced\\n 3. block of affere: no, yes\\n 4. bl. of lymph. c: no, yes\\n 5. bl. of lymph. s: no, yes\\n 6. by pass: no, yes\\n 7. extravasates: no, yes\\n 8. regeneration of: no, yes\\n 9. early uptake in: no, yes\\n 10. lym.nodes dimin: 0-3\\n 11. lym.nodes enlar: 1-4\\n 12. changes in lym.: bean, oval, round\\n 13. defect in node: no, lacunar, lac. marginal, lac. central\\n 14. changes in node: no, lacunar, lac. margin, lac. central\\n 15. changes in stru: no, grainy, drop-like, coarse, diluted, reticular, \\n stripped, faint, \\n 16. special forms: no, chalices, vesicles\\n 17. dislocation of: no, yes\\n 18. exclusion of no: no, yes\\n 19. no. of nodes in: 0-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, >=70\\n \\n 8. Missing Attribute Values: None\\n \\n 9. Class Distribution: \\n Class: Number of Instances:\\n normal find: 2\\n metastases: 81\\n malign lymph: 61\\n fibrosis: 4\\n \\n \\n\\n\\n\\n\\n Relabeled values in attribute 'lymphatics'\\n From: '1' To: normal \\n From: '2' To: arched \\n From: '3' To: deformed \\n From: '4' To: displaced \\n\\n\\n Relabeled values in attribute 'block_of_affere'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'bl_of_lymph_c'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'bl_of_lymph_s'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'by_pass'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'extravasates'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'regeneration_of'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'early_uptake_in'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'changes_in_lym'\\n From: '1' To: bean \\n From: '2' To: oval \\n From: '3' To: round \\n\\n\\n Relabeled values in attribute 'defect_in_node'\\n From: '1' To: no \\n From: '2' To: lacunar \\n From: '3' To: lac_margin \\n From: '4' To: lac_central \\n\\n\\n Relabeled values in attribute 'changes_in_node'\\n From: '1' To: no \\n From: '2' To: lacunar \\n From: '3' To: lac_margin \\n From: '4' To: lac_central \\n\\n\\n Relabeled values in attribute 'changes_in_stru'\\n From: '1' To: no \\n From: '2' To: grainy \\n From: '3' To: drop_like \\n From: '4' To: coarse \\n From: '5' To: diluted \\n From: '6' To: reticular \\n From: '7' To: stripped \\n From: '8' To: faint \\n\\n\\n Relabeled values in attribute 'special_forms'\\n From: '1' To: no \\n From: '2' To: chalices \\n From: '3' To: vesicles \\n\\n\\n Relabeled values in attribute 'dislocation_of'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'exclusion_of_no'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'class'\\n From: '1' To: normal \\n From: '2' To: metastases \\n From: '3' To: malign_lymph \\n From: '4' To: fibrosis\",\n", + " 'did': 10,\n", + " 'features': '0 : [0 - lymphatics (nominal)], 1 : [1 - block_of_affere (nominal)], 2 : [2 - bl_of_lymph_c (nominal)], 3 : [3 - bl_of_lymph_s (nominal)], 4 : [4 - by_pass (nominal)], 5 : [5 - extravasates (nominal)], 6 : [6 - regeneration_of (nominal)], 7 : [7 - early_uptake_in (nominal)], 8 : [8 - lym_nodes_dimin (numeric)], 9 : [9 - lym_nodes_enlar (numeric)], 10 : [10 - changes_in_lym (nominal)], 11 : [11 - defect_in_node (nominal)], 12 : [12 - changes_in_node (nominal)], 13 : [13 - changes_in_stru (nominal)], 14 : [14 - special_forms (nominal)], 15 : [15 - dislocation_of (nominal)], 16 : [16 - exclusion_of_no (nominal)], 17 : [17 - no_of_nodes_in (numeric)], 18 : [18 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'lymph',\n", + " 'qualities': 'AutoCorrelation : 0.5034013605442177, CfsSubsetEval_DecisionStumpAUC : 0.7924545850419331, CfsSubsetEval_DecisionStumpErrRate : 0.23648648648648649, CfsSubsetEval_DecisionStumpKappa : 0.5474401537655076, CfsSubsetEval_NaiveBayesAUC : 0.7924545850419331, CfsSubsetEval_NaiveBayesErrRate : 0.23648648648648649, CfsSubsetEval_NaiveBayesKappa : 0.5474401537655076, CfsSubsetEval_kNN1NAUC : 0.7924545850419331, CfsSubsetEval_kNN1NErrRate : 0.23648648648648649, CfsSubsetEval_kNN1NKappa : 0.5474401537655076, ClassEntropy : 1.2276775019465804, DecisionStumpAUC : 0.7715656536027917, DecisionStumpErrRate : 0.24324324324324326, DecisionStumpKappa : 0.5316455696202532, Dimensionality : 0.12837837837837837, EquivalentNumberOfAtts : 9.37680223405617, J48.00001.AUC : 0.8035040133716935, J48.00001.ErrRate : 0.24324324324324326, J48.00001.Kappa : 0.55, J48.0001.AUC : 0.8035040133716935, J48.0001.ErrRate : 0.24324324324324326, J48.0001.Kappa : 0.55, J48.001.AUC : 0.8035040133716935, J48.001.ErrRate : 0.24324324324324326, J48.001.Kappa : 0.55, MajorityClassPercentage : 54.729729729729726, MajorityClassSize : 81.0, MaxAttributeEntropy : 2.527125737973009, MaxKurtosisOfNumericAtts : 29.749465128075876, MaxMeansOfNumericAtts : 2.6013513513513518, MaxMutualInformation : 0.40188387586188, MaxNominalAttDistinctValues : 8.0, MaxSkewnessOfNumericAtts : 5.442361694493849, MaxStdDevOfNumericAtts : 1.9050233089611373, MeanAttributeEntropy : 1.1174061851513224, MeanKurtosisOfNumericAtts : 9.883463404163178, MeanMeansOfNumericAtts : 2.045045045045045, MeanMutualInformation : 0.13092709767170999, MeanNoiseToSignalRatio : 7.534567748176438, MeanNominalAttDistinctValues : 3.0, MeanSkewnessOfNumericAtts : 2.326489482053779, MeanStdDevOfNumericAtts : 1.0184023049334343, MinAttributeEntropy : 0.2748031957462935, MinKurtosisOfNumericAtts : -0.5040960482425287, MinMeansOfNumericAtts : 1.060810810810811, MinMutualInformation : 0.02911996300275, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : 0.33379516180165014, MinStdDevOfNumericAtts : 0.3135565426849874, MinorityClassPercentage : 1.3513513513513513, MinorityClassSize : 2.0, NaiveBayesAUC : 0.9083282647773021, NaiveBayesErrRate : 0.1554054054054054, NaiveBayesKappa : 0.7014820661229503, NumberOfBinaryFeatures : 9.0, NumberOfClasses : 4.0, NumberOfFeatures : 19.0, NumberOfInstances : 148.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 3.0, NumberOfSymbolicFeatures : 16.0, PercentageOfBinaryFeatures : 47.368421052631575, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 15.789473684210526, PercentageOfSymbolicFeatures : 84.21052631578947, Quartile1AttributeEntropy : 0.7404482452691425, Quartile1KurtosisOfNumericAtts : -0.5040960482425287, Quartile1MeansOfNumericAtts : 1.060810810810811, Quartile1MutualInformation : 0.0637948721468, Quartile1SkewnessOfNumericAtts : 0.33379516180165014, Quartile1StdDevOfNumericAtts : 0.3135565426849874, Quartile2AttributeEntropy : 0.9915528503834039, Quartile2KurtosisOfNumericAtts : 0.40502113265619144, Quartile2MeansOfNumericAtts : 2.472972972972973, Quartile2MutualInformation : 0.135651202733, Quartile2SkewnessOfNumericAtts : 1.2033115898658382, Quartile2StdDevOfNumericAtts : 0.8366270631541782, Quartile3AttributeEntropy : 1.6082585569929884, Quartile3KurtosisOfNumericAtts : 29.749465128075876, Quartile3MeansOfNumericAtts : 2.6013513513513518, Quartile3MutualInformation : 0.17368798992783, Quartile3SkewnessOfNumericAtts : 5.442361694493849, Quartile3StdDevOfNumericAtts : 1.9050233089611373, REPTreeDepth1AUC : 0.7466579226863443, REPTreeDepth1ErrRate : 0.2905405405405405, REPTreeDepth1Kappa : 0.430361618331543, REPTreeDepth2AUC : 0.7466579226863443, REPTreeDepth2ErrRate : 0.2905405405405405, REPTreeDepth2Kappa : 0.430361618331543, REPTreeDepth3AUC : 0.7466579226863443, REPTreeDepth3ErrRate : 0.2905405405405405, REPTreeDepth3Kappa : 0.430361618331543, RandomTreeDepth1AUC : 0.7576210719961072, RandomTreeDepth1ErrRate : 0.24324324324324326, RandomTreeDepth1Kappa : 0.5295364238410597, RandomTreeDepth2AUC : 0.7576210719961072, RandomTreeDepth2ErrRate : 0.24324324324324326, RandomTreeDepth2Kappa : 0.5295364238410597, RandomTreeDepth3AUC : 0.7576210719961072, RandomTreeDepth3ErrRate : 0.24324324324324326, RandomTreeDepth3Kappa : 0.5295364238410597, StdvNominalAttDistinctValues : 1.591644851508443, kNN1NAUC : 0.8277376333822596, kNN1NErrRate : 0.19594594594594594, kNN1NKappa : 0.6237068209714186,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1}" ] }, - "execution_count": 31, + "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pd.DataFrame.from_records(collec.get())" + "df['metadatas'][0]" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 94, "metadata": {}, "outputs": [ { @@ -376,7 +242,7 @@ " 'data': None}" ] }, - "execution_count": 27, + "execution_count": 94, "metadata": {}, "output_type": "execute_result" } @@ -387,10 +253,88 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 95, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__class_vars__', '__copy__', '__deepcopy__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__fields__', '__fields_set__', '__format__', '__ge__', '__get_pydantic_core_schema__', '__get_pydantic_json_schema__', '__getattr__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__pretty__', '__private_attributes__', '__pydantic_complete__', '__pydantic_core_schema__', '__pydantic_custom_init__', '__pydantic_decorators__', '__pydantic_extra__', '__pydantic_fields_set__', '__pydantic_generic_metadata__', '__pydantic_init_subclass__', '__pydantic_parent_namespace__', '__pydantic_post_init__', '__pydantic_private__', '__pydantic_root_model__', '__pydantic_serializer__', '__pydantic_validator__', '__reduce__', '__reduce_ex__', '__repr__', '__repr_args__', '__repr_name__', '__repr_str__', '__rich_repr__', '__setattr__', '__setstate__', '__signature__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_calculate_keys', '_check_frozen', '_copy_and_set_values', '_embed', '_get_value', '_iter', '_normalize_embeddings', '_validate_embedding_set', 'add', 'construct', 'copy', 'count', 'database', 'delete', 'dict', 'from_orm', 'get', 'id', 'json', 'metadata', 'model_computed_fields', 'model_config', 'model_construct', 'model_copy', 'model_dump', 'model_dump_json', 'model_extra', 'model_fields', 'model_fields_set', 'model_json_schema', 'model_parametrized_name', 'model_post_init', 'model_rebuild', 'model_validate', 'model_validate_json', 'model_validate_strings', 'modify', 'name', 'parse_file', 'parse_obj', 'parse_raw', 'peek', 'query', 'schema', 'schema_json', 'tenant', 'update', 'update_forward_refs', 'upsert', 'validate']\n" + ] + } + ], + "source": [ + "print(dir(collec))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ids': ['44335'],\n", + " 'embeddings': None,\n", + " 'metadatas': [{'MajorityClassSize': 1221.0,\n", + " 'MinorityClassSize': 372.0,\n", + " 'NumberOfClasses': 25.0,\n", + " 'NumberOfFeatures': 3.0,\n", + " 'NumberOfInstances': 15122.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 0.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 4422,\n", + " 'description': '## **Meta-Album Fungi Dataset (Extended)**\\n***\\nMeta-Album Fungi dataset is created by sampling the Danish Fungi 2020 dataset(https://arxiv.org/abs/2103.10107), itself a sampling of the Atlas of Danish Fungi repository. The images and labels which enter this database are sourced by a group consisting of 3 300 citizen botanists, then verified by their peers using a ranking of each person reliability, then finally verified by experts working at the Atlas. Of the 128 classes in the original Danish Fungi 2020 dataset, FNG retains the 25 most populous classes, belonging to six genera, for a total of 15 122 images total, with min 372, and max 1 221 images per class. Each image contains a colored 128x128 image of a fungus or a piece of a fungus from the corresponding class. Because the initial data were of widely varying sizes, we needed to crop a significant portion of the images, which we implemented by taking the largest possible square with center at the middle of the initial image. We then scaled each squared image to the 128x128 standard using the INTER_AREA anti-aliasing filter from Open-CV. \\n\\n\\n\\n### **Dataset Details**\\n![](https://meta-album.github.io/assets/img/samples/FNG.png)\\n\\n**Meta Album ID**: PLT.FNG \\n**Meta Album URL**: [https://meta-album.github.io/datasets/FNG.html](https://meta-album.github.io/datasets/FNG.html) \\n**Domain ID**: PLT \\n**Domain Name**: Plants \\n**Dataset ID**: FNG \\n**Dataset Name**: Fungi \\n**Short Description**: Fungi dataset from Denmark \\n**\\\\# Classes**: 25 \\n**\\\\# Images**: 15122 \\n**Keywords**: fungi, ecology, plants \\n**Data Format**: images \\n**Image size**: 128x128 \\n\\n**License (original data release)**: BSD-3-Clause License \\n**License URL(original data release)**: https://github.com/picekl/DanishFungiDataset/blob/main/LICENSE\\n \\n**License (Meta-Album data release)**: BSD-3-Clause License \\n**License URL (Meta-Album data release)**: [https://github.com/picekl/DanishFungiDataset/blob/main/LICENSE](https://github.com/picekl/DanishFungiDataset/blob/main/LICENSE) \\n\\n**Source**: Danish Fungi Dataset \\n**Source URL**: https://sites.google.com/view/danish-fungi-dataset \\n \\n**Original Author**: Lukas Picek, Milan Sulc, Jiri Matas, Jacob Heilmann-Clausen, Thomas S. Jeppesen, Thomas Laessoe, Tobias Froslev \\n**Original contact**: lukaspicek@gmail.com \\n\\n**Meta Album author**: Felix Herron \\n**Created Date**: 01 March 2022 \\n**Contact Name**: Ihsan Ullah \\n**Contact Email**: meta-album@chalearn.org \\n**Contact URL**: [https://meta-album.github.io/](https://meta-album.github.io/) \\n\\n\\n\\n### **Cite this dataset**\\n```\\n@article{picek2021danish,\\n title={Danish Fungi 2020 - Not Just Another Image Recognition Dataset},\\n author={Lukas Picek and Milan Sulc and Jiri Matas and Jacob Heilmann-Clausen and Thomas S. Jeppesen and Thomas Laessoe and Tobias Froslev},\\n year={2021},\\n eprint={2103.10107},\\n archivePrefix={arXiv},\\n primaryClass={cs.CV}\\n}\\n```\\n\\n\\n### **Cite Meta-Album**\\n```\\n@inproceedings{meta-album-2022,\\n title={Meta-Album: Multi-domain Meta-Dataset for Few-Shot Image Classification},\\n author={Ullah, Ihsan and Carrion, Dustin and Escalera, Sergio and Guyon, Isabelle M and Huisman, Mike and Mohr, Felix and van Rijn, Jan N and Sun, Haozhe and Vanschoren, Joaquin and Vu, Phan Anh},\\n booktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\\n url = {https://meta-album.github.io/},\\n year = {2022}\\n }\\n```\\n\\n\\n### **More**\\nFor more information on the Meta-Album dataset, please see the [[NeurIPS 2022 paper]](https://meta-album.github.io/paper/Meta-Album.pdf) \\nFor details on the dataset preprocessing, please see the [[supplementary materials]](https://openreview.net/attachment?id=70_Wx-dON3q&name=supplementary_material) \\nSupporting code can be found on our [[GitHub repo]](https://github.com/ihsaan-ullah/meta-album) \\nMeta-Album on Papers with Code [[Meta-Album]](https://paperswithcode.com/dataset/meta-album) \\n\\n\\n\\n### **Other versions of this dataset**\\n[[Micro]](https://www.openml.org/d/44272) [[Mini]](https://www.openml.org/d/44302)',\n", + " 'did': 44335,\n", + " 'features': '0 : [0 - FILE_NAME (string)], 1 : [1 - CATEGORY (string)], 2 : [2 - SUPER_CATEGORY (string)],',\n", + " 'format': 'arff',\n", + " 'name': 'Meta_Album_FNG_Extended',\n", + " 'qualities': 'AutoCorrelation : 1.0, Dimensionality : 0.00019838645681788123, MajorityClassPercentage : 8.074328792487766, MajorityClassSize : 1221.0, MinorityClassPercentage : 2.4599920645417273, MinorityClassSize : 372.0, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 25.0, NumberOfFeatures : 3.0, NumberOfInstances : 15122.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 0.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 0.0, PercentageOfSymbolicFeatures : 0.0,',\n", + " 'status': 'active',\n", + " 'uploader': 30980,\n", + " 'version': 1}],\n", + " 'documents': None,\n", + " 'uris': None,\n", + " 'data': None}" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filter_condition = {\n", + " 'NumberOfClasses': {'$gte': 25.0}\n", + "}\n", + "import numpy as np\n", + "query_embeddings = np.random.rand(1, 128) \n", + "query_texts = [\"example text\"] \n", + "# filtered_data = collec.query(query_texts=query_texts, where={'metadata': filter_condition})\n", + "collec.get(ids = ['305', '4420', '4429', '4430', '4432', '44335'], where=filter_condition, include=['metadatas'])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(query_embeddings: Union[Sequence[float], Sequence[int], List[Union[Sequence[float], Sequence[int]]], numpy.ndarray, List[numpy.ndarray], NoneType] = None, query_texts: Union[str, List[str], NoneType] = None, query_images: Union[numpy.ndarray[Any, numpy.dtype[Union[numpy.uint64, numpy.int64, numpy.float64]]], List[numpy.ndarray[Any, numpy.dtype[Union[numpy.uint64, numpy.int64, numpy.float64]]]], NoneType] = None, query_uris: Union[str, List[str], NoneType] = None, n_results: int = 10, where: Optional[Dict[Union[str, Literal['$and'], Literal['$or']], Union[str, int, float, bool, Dict[Union[Literal['$gt'], Literal['$gte'], Literal['$lt'], Literal['$lte'], Literal['$ne'], Literal['$eq'], Literal['$and'], Literal['$or']], Union[str, int, float, bool]], Dict[Union[Literal['$in'], Literal['$nin']], List[Union[str, int, float, bool]]], List[ForwardRef('Where')]]]] = None, where_document: Optional[Dict[Union[Literal['$contains'], Literal['$not_contains'], Literal['$and'], Literal['$or']], Union[str, List[ForwardRef('WhereDocument')]]]] = None, include: List[Union[Literal['documents'], Literal['embeddings'], Literal['metadatas'], Literal['distances'], Literal['uris'], Literal['data']]] = ['metadatas', 'documents', 'distances']) -> chromadb.api.types.QueryResult\n" + ] + } + ], + "source": [ + "import inspect\n", + "print(inspect.signature(collec.query))" + ] } ], "metadata": { @@ -409,7 +353,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.12.0" } }, "nbformat": 4, diff --git a/useful_scripts/selfquery.ipynb b/useful_scripts/test_selfquery.ipynb similarity index 97% rename from useful_scripts/selfquery.ipynb rename to useful_scripts/test_selfquery.ipynb index 227af3c..b75b998 100644 --- a/useful_scripts/selfquery.ipynb +++ b/useful_scripts/test_selfquery.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -242,7 +242,7 @@ "[5 rows x 21 columns]" ] }, - "execution_count": 9, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -256,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -288,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -311,7 +311,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -328,7 +328,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -339,7 +339,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -356,7 +356,7 @@ "{'name': 'MajorityClassSize', 'description': 'Number of instances belonging to the most frequent class.', 'type': 'Numeric'}\n", "{'name': 'MaxNominalAttDistinctValues', 'description': 'The maximum number of distinct values among attributes of the nominal type.', 'type': 'Numeric'}\n", "{'name': 'MinorityClassSize', 'description': 'Number of instances belonging to the least frequent class.', 'type': 'Numeric'}\n", - "{'name': 'NumberOfClasses', 'description': 'Number of classes in the dataset. 2.0 for binary classification, and more than 2.0 for multi-class classification.', 'type': 'Numeric'}\n", + "{'name': 'NumberOfClasses', 'description': 'Number of classes in the dataset. 2.0 for binary classification, and more than 2.0 for multi-class classification.', 'type': 'Float'}\n", "{'name': 'NumberOfFeatures', 'description': 'Number of features or attributes in the dataset.', 'type': 'Numeric'}\n", "{'name': 'NumberOfInstances', 'description': 'Number of instances in the dataset', 'type': 'Numeric'}\n", "{'name': 'NumberOfInstancesWithMissingValues', 'description': 'Number of instances with missing values in the dataset', 'type': 'Numeric'}\n", @@ -390,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -402,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -447,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -529,7 +529,7 @@ " },\n", " \"NumberOfClasses\": {\n", " \"description\": \"Number of classes in the dataset. 2.0 for binary classification, and more than 2.0 for multi-class classification.\",\n", - " \"type\": \"Numeric\"\n", + " \"type\": \"Float\"\n", " },\n", " \"NumberOfFeatures\": {\n", " \"description\": \"Number of features or attributes in the dataset.\",\n", @@ -669,7 +669,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -680,7 +680,7 @@ " 'type': 'String'},\n", " {'name': 'NumberOfClasses',\n", " 'description': 'Number of classes in the dataset. 2.0 for binary classification, and more than 2.0 for multi-class classification.',\n", - " 'type': 'Numeric'},\n", + " 'type': 'Float'},\n", " {'name': 'NumberOfFeatures',\n", " 'description': 'Number of features or attributes in the dataset.',\n", " 'type': 'Numeric'},\n", @@ -692,7 +692,7 @@ " 'type': 'String'})" ] }, - "execution_count": 46, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -715,7 +715,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -731,7 +731,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -742,14 +742,14 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Search Query: mushroom dataset\n", + "Search Query: mushroom\n", "Filter(s): comparator= attribute='NumberOfInstances' value=10000\n" ] } @@ -761,27 +761,36 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "StructuredQuery(query='mushroom dataset', filter=None, limit=None)" + "langchain_core.structured_query.StructuredQuery" ] }, - "execution_count": 50, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], + "source": [ + "type(structured_query)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "chain.invoke({\"query\": \"Find a mushroom dataset.\"})" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -801,7 +810,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -821,7 +830,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -841,7 +850,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -850,7 +859,7 @@ "StructuredQuery(query='mushroom dataset', filter=Comparison(comparator=, attribute='NumberOfClasses', value=2.0), limit=None)" ] }, - "execution_count": 56, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -861,7 +870,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -903,7 +912,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -933,7 +942,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -972,7 +981,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1008,7 +1017,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1028,7 +1037,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1036,17 +1045,16 @@ "output_type": "stream", "text": [ "mushroom dataset\n", - "comparator= attribute='NumberOfClasses' value=2.0\n", - "mushroom dataset {'filter': {'NumberOfClasses': {'$gte': 2.0}}}\n" + "comparator= attribute='NumberOfInstances' value=10000\n" ] }, { "data": { "text/plain": [ - "('mushroom dataset', {'filter': {'NumberOfClasses': {'$gte': 2.0}}})" + "('mushroom dataset', {'filter': {'NumberOfInstances': {'$lt': 10000}}})" ] }, - "execution_count": 115, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1063,7 +1071,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1096,7 +1104,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": null, "metadata": {}, "outputs": [ { diff --git a/useful_scripts/test_strutured_query_pipeline.ipynb b/useful_scripts/test_strutured_query_pipeline.ipynb new file mode 100644 index 0000000..c3b3e86 --- /dev/null +++ b/useful_scripts/test_strutured_query_pipeline.ipynb @@ -0,0 +1,1623 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "import sys\n", + "sys.path.append(\"../\")\n", + "from frontend.ui_utils import *" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"../backend/config.json\", \"r\") as file:\n", + " config = json.load(file)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0didnameversionuploaderstatusformatMajorityClassSizeMaxNominalAttDistinctValuesMinorityClassSize...NumberOfFeaturesNumberOfInstancesNumberOfInstancesWithMissingValuesNumberOfMissingValuesNumberOfNumericFeaturesNumberOfSymbolicFeaturesdescriptionqualitiesfeaturesCombined_information
002anneal11activeARFF684.07.08.0...39.0898.0898.022175.06.033.0**Author**: Unknown. Donated by David Sterling...AutoCorrelation : 0.6064659977703456, CfsSubse...0 : [0 - family (nominal)], 1 : [1 - product-t...did - 2, name - anneal, version - 1, uploader ...
113kr-vs-kp11activeARFF1669.03.01527.0...37.03196.00.00.00.037.0Author: Alen Shapiro\\nSource: [UCI](https://ar...AutoCorrelation : 0.9990610328638497, CfsSubse...0 : [0 - bkblk (nominal)], 1 : [1 - bknwy (nom...did - 3, name - kr-vs-kp, version - 1, uploade...
224labor11activeARFF37.03.020.0...17.057.056.0326.08.09.0**Author**: Unknown\\n**Source**: Collective Ba...AutoCorrelation : 0.75, CfsSubsetEval_Decision...0 : [0 - duration (numeric)], 1 : [1 - wage-in...did - 4, name - labor, version - 1, uploader -...
335arrhythmia11activeARFF245.013.02.0...280.0452.0384.0408.0206.074.0**Author**: H. Altay Guvenir, Burak Acar, Hald...AutoCorrelation : 0.35476718403547675, CfsSubs...0 : [0 - age (numeric)], 1 : [1 - sex (nominal...did - 5, name - arrhythmia, version - 1, uploa...
446letter11activeARFF813.026.0734.0...17.020000.00.00.016.01.0**Author**: David J. Slate \\n**Source**: [UCI...AutoCorrelation : 0.04090204510225511, CfsSubs...0 : [0 - x-box (numeric)], 1 : [1 - y-box (num...did - 6, name - letter, version - 1, uploader ...
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 did name version uploader status format \\\n", + "0 0 2 anneal 1 1 active ARFF \n", + "1 1 3 kr-vs-kp 1 1 active ARFF \n", + "2 2 4 labor 1 1 active ARFF \n", + "3 3 5 arrhythmia 1 1 active ARFF \n", + "4 4 6 letter 1 1 active ARFF \n", + "\n", + " MajorityClassSize MaxNominalAttDistinctValues MinorityClassSize ... \\\n", + "0 684.0 7.0 8.0 ... \n", + "1 1669.0 3.0 1527.0 ... \n", + "2 37.0 3.0 20.0 ... \n", + "3 245.0 13.0 2.0 ... \n", + "4 813.0 26.0 734.0 ... \n", + "\n", + " NumberOfFeatures NumberOfInstances NumberOfInstancesWithMissingValues \\\n", + "0 39.0 898.0 898.0 \n", + "1 37.0 3196.0 0.0 \n", + "2 17.0 57.0 56.0 \n", + "3 280.0 452.0 384.0 \n", + "4 17.0 20000.0 0.0 \n", + "\n", + " NumberOfMissingValues NumberOfNumericFeatures NumberOfSymbolicFeatures \\\n", + "0 22175.0 6.0 33.0 \n", + "1 0.0 0.0 37.0 \n", + "2 326.0 8.0 9.0 \n", + "3 408.0 206.0 74.0 \n", + "4 0.0 16.0 1.0 \n", + "\n", + " description \\\n", + "0 **Author**: Unknown. Donated by David Sterling... \n", + "1 Author: Alen Shapiro\\nSource: [UCI](https://ar... \n", + "2 **Author**: Unknown\\n**Source**: Collective Ba... \n", + "3 **Author**: H. Altay Guvenir, Burak Acar, Hald... \n", + "4 **Author**: David J. Slate \\n**Source**: [UCI... \n", + "\n", + " qualities \\\n", + "0 AutoCorrelation : 0.6064659977703456, CfsSubse... \n", + "1 AutoCorrelation : 0.9990610328638497, CfsSubse... \n", + "2 AutoCorrelation : 0.75, CfsSubsetEval_Decision... \n", + "3 AutoCorrelation : 0.35476718403547675, CfsSubs... \n", + "4 AutoCorrelation : 0.04090204510225511, CfsSubs... \n", + "\n", + " features \\\n", + "0 0 : [0 - family (nominal)], 1 : [1 - product-t... \n", + "1 0 : [0 - bkblk (nominal)], 1 : [1 - bknwy (nom... \n", + "2 0 : [0 - duration (numeric)], 1 : [1 - wage-in... \n", + "3 0 : [0 - age (numeric)], 1 : [1 - sex (nominal... \n", + "4 0 : [0 - x-box (numeric)], 1 : [1 - y-box (num... \n", + "\n", + " Combined_information \n", + "0 did - 2, name - anneal, version - 1, uploader ... \n", + "1 did - 3, name - kr-vs-kp, version - 1, uploade... \n", + "2 did - 4, name - labor, version - 1, uploader -... \n", + "3 did - 5, name - arrhythmia, version - 1, uploa... \n", + "4 did - 6, name - letter, version - 1, uploader ... \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_metadata_path = Path(config[\"data_dir\"]) / \"all_dataset_description.csv\"\n", + "data_metadata = pd.read_csv(data_metadata_path)\n", + "data_metadata.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 57/57 [00:00<00:00, 83.38it/s]\n" + ] + } + ], + "source": [ + "\n", + "from structured_query.chroma_store_utilis import *\n", + "\n", + "collec = load_chroma_metadata()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ids': ['10'],\n", + " 'embeddings': None,\n", + " 'metadatas': [{'MajorityClassSize': 81.0,\n", + " 'MaxNominalAttDistinctValues': 8.0,\n", + " 'MinorityClassSize': 2.0,\n", + " 'NumberOfClasses': 4.0,\n", + " 'NumberOfFeatures': 19.0,\n", + " 'NumberOfInstances': 148.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 3.0,\n", + " 'NumberOfSymbolicFeatures': 16.0,\n", + " 'Unnamed: 0': 8,\n", + " 'description': \"**Author**: \\n**Source**: Unknown - \\n**Please cite**: \\n\\nCitation Request:\\n This lymphography domain was obtained from the University Medical Centre,\\n Institute of Oncology, Ljubljana, Yugoslavia. Thanks go to M. Zwitter and \\n M. Soklic for providing the data. Please include this citation if you plan\\n to use this database.\\n \\n 1. Title: Lymphography Domain\\n \\n 2. Sources: \\n (a) See Above.\\n (b) Donors: Igor Kononenko, \\n University E.Kardelj\\n Faculty for electrical engineering\\n Trzaska 25\\n 61000 Ljubljana (tel.: (38)(+61) 265-161\\n \\n Bojan Cestnik\\n Jozef Stefan Institute\\n Jamova 39\\n 61000 Ljubljana\\n Yugoslavia (tel.: (38)(+61) 214-399 ext.287) \\n (c) Date: November 1988\\n \\n 3. Past Usage: (sveral)\\n 1. Cestnik,G., Konenenko,I, & Bratko,I. (1987). Assistant-86: A\\n Knowledge-Elicitation Tool for Sophisticated Users. In I.Bratko\\n & N.Lavrac (Eds.) Progress in Machine Learning, 31-45, Sigma Press.\\n -- Assistant-86: 76% accuracy\\n 2. Clark,P. & Niblett,T. (1987). Induction in Noisy Domains. In\\n I.Bratko & N.Lavrac (Eds.) Progress in Machine Learning, 11-30,\\n Sigma Press.\\n -- Simple Bayes: 83% accuracy\\n -- CN2 (99% threshold): 82%\\n 3. Michalski,R., Mozetic,I. Hong,J., & Lavrac,N. (1986). The Multi-Purpose\\n Incremental Learning System AQ15 and its Testing Applications to Three\\n Medical Domains. In Proceedings of the Fifth National Conference on\\n Artificial Intelligence, 1041-1045. Philadelphia, PA: Morgan Kaufmann.\\n -- Experts: 85% accuracy (estimate)\\n -- AQ15: 80-82%\\n \\n 4. Relevant Information:\\n This is one of three domains provided by the Oncology Institute\\n that has repeatedly appeared in the machine learning literature.\\n (See also breast-cancer and primary-tumor.)\\n \\n 5. Number of Instances: 148\\n \\n 6. Number of Attributes: 19 including the class attribute\\n \\n 7. Attribute information:\\n --- NOTE: All attribute values in the database have been entered as\\n numeric values corresponding to their index in the list\\n of attribute values for that attribute domain as given below.\\n 1. class: normal find, metastases, malign lymph, fibrosis\\n 2. lymphatics: normal, arched, deformed, displaced\\n 3. block of affere: no, yes\\n 4. bl. of lymph. c: no, yes\\n 5. bl. of lymph. s: no, yes\\n 6. by pass: no, yes\\n 7. extravasates: no, yes\\n 8. regeneration of: no, yes\\n 9. early uptake in: no, yes\\n 10. lym.nodes dimin: 0-3\\n 11. lym.nodes enlar: 1-4\\n 12. changes in lym.: bean, oval, round\\n 13. defect in node: no, lacunar, lac. marginal, lac. central\\n 14. changes in node: no, lacunar, lac. margin, lac. central\\n 15. changes in stru: no, grainy, drop-like, coarse, diluted, reticular, \\n stripped, faint, \\n 16. special forms: no, chalices, vesicles\\n 17. dislocation of: no, yes\\n 18. exclusion of no: no, yes\\n 19. no. of nodes in: 0-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, >=70\\n \\n 8. Missing Attribute Values: None\\n \\n 9. Class Distribution: \\n Class: Number of Instances:\\n normal find: 2\\n metastases: 81\\n malign lymph: 61\\n fibrosis: 4\\n \\n \\n\\n\\n\\n\\n Relabeled values in attribute 'lymphatics'\\n From: '1' To: normal \\n From: '2' To: arched \\n From: '3' To: deformed \\n From: '4' To: displaced \\n\\n\\n Relabeled values in attribute 'block_of_affere'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'bl_of_lymph_c'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'bl_of_lymph_s'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'by_pass'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'extravasates'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'regeneration_of'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'early_uptake_in'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'changes_in_lym'\\n From: '1' To: bean \\n From: '2' To: oval \\n From: '3' To: round \\n\\n\\n Relabeled values in attribute 'defect_in_node'\\n From: '1' To: no \\n From: '2' To: lacunar \\n From: '3' To: lac_margin \\n From: '4' To: lac_central \\n\\n\\n Relabeled values in attribute 'changes_in_node'\\n From: '1' To: no \\n From: '2' To: lacunar \\n From: '3' To: lac_margin \\n From: '4' To: lac_central \\n\\n\\n Relabeled values in attribute 'changes_in_stru'\\n From: '1' To: no \\n From: '2' To: grainy \\n From: '3' To: drop_like \\n From: '4' To: coarse \\n From: '5' To: diluted \\n From: '6' To: reticular \\n From: '7' To: stripped \\n From: '8' To: faint \\n\\n\\n Relabeled values in attribute 'special_forms'\\n From: '1' To: no \\n From: '2' To: chalices \\n From: '3' To: vesicles \\n\\n\\n Relabeled values in attribute 'dislocation_of'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'exclusion_of_no'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'class'\\n From: '1' To: normal \\n From: '2' To: metastases \\n From: '3' To: malign_lymph \\n From: '4' To: fibrosis\",\n", + " 'did': 10,\n", + " 'features': '0 : [0 - lymphatics (nominal)], 1 : [1 - block_of_affere (nominal)], 2 : [2 - bl_of_lymph_c (nominal)], 3 : [3 - bl_of_lymph_s (nominal)], 4 : [4 - by_pass (nominal)], 5 : [5 - extravasates (nominal)], 6 : [6 - regeneration_of (nominal)], 7 : [7 - early_uptake_in (nominal)], 8 : [8 - lym_nodes_dimin (numeric)], 9 : [9 - lym_nodes_enlar (numeric)], 10 : [10 - changes_in_lym (nominal)], 11 : [11 - defect_in_node (nominal)], 12 : [12 - changes_in_node (nominal)], 13 : [13 - changes_in_stru (nominal)], 14 : [14 - special_forms (nominal)], 15 : [15 - dislocation_of (nominal)], 16 : [16 - exclusion_of_no (nominal)], 17 : [17 - no_of_nodes_in (numeric)], 18 : [18 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'lymph',\n", + " 'qualities': 'AutoCorrelation : 0.5034013605442177, CfsSubsetEval_DecisionStumpAUC : 0.7924545850419331, CfsSubsetEval_DecisionStumpErrRate : 0.23648648648648649, CfsSubsetEval_DecisionStumpKappa : 0.5474401537655076, CfsSubsetEval_NaiveBayesAUC : 0.7924545850419331, CfsSubsetEval_NaiveBayesErrRate : 0.23648648648648649, CfsSubsetEval_NaiveBayesKappa : 0.5474401537655076, CfsSubsetEval_kNN1NAUC : 0.7924545850419331, CfsSubsetEval_kNN1NErrRate : 0.23648648648648649, CfsSubsetEval_kNN1NKappa : 0.5474401537655076, ClassEntropy : 1.2276775019465804, DecisionStumpAUC : 0.7715656536027917, DecisionStumpErrRate : 0.24324324324324326, DecisionStumpKappa : 0.5316455696202532, Dimensionality : 0.12837837837837837, EquivalentNumberOfAtts : 9.37680223405617, J48.00001.AUC : 0.8035040133716935, J48.00001.ErrRate : 0.24324324324324326, J48.00001.Kappa : 0.55, J48.0001.AUC : 0.8035040133716935, J48.0001.ErrRate : 0.24324324324324326, J48.0001.Kappa : 0.55, J48.001.AUC : 0.8035040133716935, J48.001.ErrRate : 0.24324324324324326, J48.001.Kappa : 0.55, MajorityClassPercentage : 54.729729729729726, MajorityClassSize : 81.0, MaxAttributeEntropy : 2.527125737973009, MaxKurtosisOfNumericAtts : 29.749465128075876, MaxMeansOfNumericAtts : 2.6013513513513518, MaxMutualInformation : 0.40188387586188, MaxNominalAttDistinctValues : 8.0, MaxSkewnessOfNumericAtts : 5.442361694493849, MaxStdDevOfNumericAtts : 1.9050233089611373, MeanAttributeEntropy : 1.1174061851513224, MeanKurtosisOfNumericAtts : 9.883463404163178, MeanMeansOfNumericAtts : 2.045045045045045, MeanMutualInformation : 0.13092709767170999, MeanNoiseToSignalRatio : 7.534567748176438, MeanNominalAttDistinctValues : 3.0, MeanSkewnessOfNumericAtts : 2.326489482053779, MeanStdDevOfNumericAtts : 1.0184023049334343, MinAttributeEntropy : 0.2748031957462935, MinKurtosisOfNumericAtts : -0.5040960482425287, MinMeansOfNumericAtts : 1.060810810810811, MinMutualInformation : 0.02911996300275, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : 0.33379516180165014, MinStdDevOfNumericAtts : 0.3135565426849874, MinorityClassPercentage : 1.3513513513513513, MinorityClassSize : 2.0, NaiveBayesAUC : 0.9083282647773021, NaiveBayesErrRate : 0.1554054054054054, NaiveBayesKappa : 0.7014820661229503, NumberOfBinaryFeatures : 9.0, NumberOfClasses : 4.0, NumberOfFeatures : 19.0, NumberOfInstances : 148.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 3.0, NumberOfSymbolicFeatures : 16.0, PercentageOfBinaryFeatures : 47.368421052631575, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 15.789473684210526, PercentageOfSymbolicFeatures : 84.21052631578947, Quartile1AttributeEntropy : 0.7404482452691425, Quartile1KurtosisOfNumericAtts : -0.5040960482425287, Quartile1MeansOfNumericAtts : 1.060810810810811, Quartile1MutualInformation : 0.0637948721468, Quartile1SkewnessOfNumericAtts : 0.33379516180165014, Quartile1StdDevOfNumericAtts : 0.3135565426849874, Quartile2AttributeEntropy : 0.9915528503834039, Quartile2KurtosisOfNumericAtts : 0.40502113265619144, Quartile2MeansOfNumericAtts : 2.472972972972973, Quartile2MutualInformation : 0.135651202733, Quartile2SkewnessOfNumericAtts : 1.2033115898658382, Quartile2StdDevOfNumericAtts : 0.8366270631541782, Quartile3AttributeEntropy : 1.6082585569929884, Quartile3KurtosisOfNumericAtts : 29.749465128075876, Quartile3MeansOfNumericAtts : 2.6013513513513518, Quartile3MutualInformation : 0.17368798992783, Quartile3SkewnessOfNumericAtts : 5.442361694493849, Quartile3StdDevOfNumericAtts : 1.9050233089611373, REPTreeDepth1AUC : 0.7466579226863443, REPTreeDepth1ErrRate : 0.2905405405405405, REPTreeDepth1Kappa : 0.430361618331543, REPTreeDepth2AUC : 0.7466579226863443, REPTreeDepth2ErrRate : 0.2905405405405405, REPTreeDepth2Kappa : 0.430361618331543, REPTreeDepth3AUC : 0.7466579226863443, REPTreeDepth3ErrRate : 0.2905405405405405, REPTreeDepth3Kappa : 0.430361618331543, RandomTreeDepth1AUC : 0.7576210719961072, RandomTreeDepth1ErrRate : 0.24324324324324326, RandomTreeDepth1Kappa : 0.5295364238410597, RandomTreeDepth2AUC : 0.7576210719961072, RandomTreeDepth2ErrRate : 0.24324324324324326, RandomTreeDepth2Kappa : 0.5295364238410597, RandomTreeDepth3AUC : 0.7576210719961072, RandomTreeDepth3ErrRate : 0.24324324324324326, RandomTreeDepth3Kappa : 0.5295364238410597, StdvNominalAttDistinctValues : 1.591644851508443, kNN1NAUC : 0.8277376333822596, kNN1NErrRate : 0.19594594594594594, kNN1NKappa : 0.6237068209714186,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1}],\n", + " 'documents': [\"**Author**: \\n**Source**: Unknown - \\n**Please cite**: \\n\\nCitation Request:\\n This lymphography domain was obtained from the University Medical Centre,\\n Institute of Oncology, Ljubljana, Yugoslavia. Thanks go to M. Zwitter and \\n M. Soklic for providing the data. Please include this citation if you plan\\n to use this database.\\n \\n 1. Title: Lymphography Domain\\n \\n 2. Sources: \\n (a) See Above.\\n (b) Donors: Igor Kononenko, \\n University E.Kardelj\\n Faculty for electrical engineering\\n Trzaska 25\\n 61000 Ljubljana (tel.: (38)(+61) 265-161\\n \\n Bojan Cestnik\\n Jozef Stefan Institute\\n Jamova 39\\n 61000 Ljubljana\\n Yugoslavia (tel.: (38)(+61) 214-399 ext.287) \\n (c) Date: November 1988\\n \\n 3. Past Usage: (sveral)\\n 1. Cestnik,G., Konenenko,I, & Bratko,I. (1987). Assistant-86: A\\n Knowledge-Elicitation Tool for Sophisticated Users. In I.Bratko\\n & N.Lavrac (Eds.) Progress in Machine Learning, 31-45, Sigma Press.\\n -- Assistant-86: 76% accuracy\\n 2. Clark,P. & Niblett,T. (1987). Induction in Noisy Domains. In\\n I.Bratko & N.Lavrac (Eds.) Progress in Machine Learning, 11-30,\\n Sigma Press.\\n -- Simple Bayes: 83% accuracy\\n -- CN2 (99% threshold): 82%\\n 3. Michalski,R., Mozetic,I. Hong,J., & Lavrac,N. (1986). The Multi-Purpose\\n Incremental Learning System AQ15 and its Testing Applications to Three\\n Medical Domains. In Proceedings of the Fifth National Conference on\\n Artificial Intelligence, 1041-1045. Philadelphia, PA: Morgan Kaufmann.\\n -- Experts: 85% accuracy (estimate)\\n -- AQ15: 80-82%\\n \\n 4. Relevant Information:\\n This is one of three domains provided by the Oncology Institute\\n that has repeatedly appeared in the machine learning literature.\\n (See also breast-cancer and primary-tumor.)\\n \\n 5. Number of Instances: 148\\n \\n 6. Number of Attributes: 19 including the class attribute\\n \\n 7. Attribute information:\\n --- NOTE: All attribute values in the database have been entered as\\n numeric values corresponding to their index in the list\\n of attribute values for that attribute domain as given below.\\n 1. class: normal find, metastases, malign lymph, fibrosis\\n 2. lymphatics: normal, arched, deformed, displaced\\n 3. block of affere: no, yes\\n 4. bl. of lymph. c: no, yes\\n 5. bl. of lymph. s: no, yes\\n 6. by pass: no, yes\\n 7. extravasates: no, yes\\n 8. regeneration of: no, yes\\n 9. early uptake in: no, yes\\n 10. lym.nodes dimin: 0-3\\n 11. lym.nodes enlar: 1-4\\n 12. changes in lym.: bean, oval, round\\n 13. defect in node: no, lacunar, lac. marginal, lac. central\\n 14. changes in node: no, lacunar, lac. margin, lac. central\\n 15. changes in stru: no, grainy, drop-like, coarse, diluted, reticular, \\n stripped, faint, \\n 16. special forms: no, chalices, vesicles\\n 17. dislocation of: no, yes\\n 18. exclusion of no: no, yes\\n 19. no. of nodes in: 0-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, >=70\\n \\n 8. Missing Attribute Values: None\\n \\n 9. Class Distribution: \\n Class: Number of Instances:\\n normal find: 2\\n metastases: 81\\n malign lymph: 61\\n fibrosis: 4\\n \\n \\n\\n\\n\\n\\n Relabeled values in attribute 'lymphatics'\\n From: '1' To: normal \\n From: '2' To: arched \\n From: '3' To: deformed \\n From: '4' To: displaced \\n\\n\\n Relabeled values in attribute 'block_of_affere'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'bl_of_lymph_c'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'bl_of_lymph_s'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'by_pass'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'extravasates'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'regeneration_of'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'early_uptake_in'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'changes_in_lym'\\n From: '1' To: bean \\n From: '2' To: oval \\n From: '3' To: round \\n\\n\\n Relabeled values in attribute 'defect_in_node'\\n From: '1' To: no \\n From: '2' To: lacunar \\n From: '3' To: lac_margin \\n From: '4' To: lac_central \\n\\n\\n Relabeled values in attribute 'changes_in_node'\\n From: '1' To: no \\n From: '2' To: lacunar \\n From: '3' To: lac_margin \\n From: '4' To: lac_central \\n\\n\\n Relabeled values in attribute 'changes_in_stru'\\n From: '1' To: no \\n From: '2' To: grainy \\n From: '3' To: drop_like \\n From: '4' To: coarse \\n From: '5' To: diluted \\n From: '6' To: reticular \\n From: '7' To: stripped \\n From: '8' To: faint \\n\\n\\n Relabeled values in attribute 'special_forms'\\n From: '1' To: no \\n From: '2' To: chalices \\n From: '3' To: vesicles \\n\\n\\n Relabeled values in attribute 'dislocation_of'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'exclusion_of_no'\\n From: '1' To: no \\n From: '2' To: yes \\n\\n\\n Relabeled values in attribute 'class'\\n From: '1' To: normal \\n From: '2' To: metastases \\n From: '3' To: malign_lymph \\n From: '4' To: fibrosis\"],\n", + " 'uris': None,\n", + " 'data': None}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "collec.get(ids=[\"10\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "custom_paths_json_path = \"../frontend/paths.json\"\n", + "\n", + "# Create an instance of ResponseParser\n", + "response_parser = ResponseParser(query_type=\"Dataset\", apply_llm_before_rag=True)\n", + "\n", + "# Override the load_paths method\n", + "def custom_load_paths():\n", + " with open(custom_paths_json_path, \"r\") as file:\n", + " return json.load(file)\n", + "\n", + "# Bind the custom load_paths method to the response_parser instance\n", + "response_parser.load_paths = custom_load_paths\n", + "response_parser.paths = response_parser.load_paths()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'paths.json'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[19], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m response_parser \u001b[38;5;241m=\u001b[39m \u001b[43mResponseParser\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mDataset\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mapply_llm_before_rag\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m response_parser\n", + "File \u001b[0;32m~/Documents/ai_search/useful_scripts/../frontend/ui_utils.py:108\u001b[0m, in \u001b[0;36mResponseParser.__init__\u001b[0;34m(self, query_type, apply_llm_before_rag)\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, query_type, apply_llm_before_rag\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquery_type \u001b[38;5;241m=\u001b[39m query_type\n\u001b[0;32m--> 108\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpaths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_paths\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrag_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m~/Documents/ai_search/useful_scripts/../frontend/ui_utils.py:119\u001b[0m, in \u001b[0;36mResponseParser.load_paths\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_paths\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 116\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 117\u001b[0m \u001b[38;5;124;03m Description: Load paths from paths.json\u001b[39;00m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 119\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpaths.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m file:\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m json\u001b[38;5;241m.\u001b[39mload(file)\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'paths.json'" + ] + } + ], + "source": [ + "response_parser = ResponseParser(query_type=\"Dataset\", apply_llm_before_rag=True)\n", + "\n", + "response_parser" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "query_type = \"Dataset\"\n", + "query = \"give me mushrom dataset with 25 classes\"" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'query': 'mushroom dataset with 25 classes', 'filter': {'comparator': 'eq', 'attribute': 'NumberOfClasses', 'value': 25}, 'limit': None}, {'filter': {'NumberOfClasses': {'$eq': 25}}}]\n" + ] + } + ], + "source": [ + "structured_query = response_parser.fetch_structured_query(\n", + " query_type=query_type, query=query\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'query': 'mushroom dataset',\n", + " 'filter': {'comparator': 'eq', 'attribute': 'NumberOfClasses', 'value': 25},\n", + " 'limit': None},\n", + " {'filter': {'NumberOfClasses': {'$eq': 25}}}]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "structured_query" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "output = response_parser.fetch_rag_response(\n", + " query_type, structured_query[0][\"query\"]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "30" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(output['initial_response'])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "filter_condition = structured_query[1]['filter']\n", + "filter_condition = {'NumberOfClasses': {'$lte': 25.0}}" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['627',\n", + " '623',\n", + " '626',\n", + " '257',\n", + " '245',\n", + " '624',\n", + " '636',\n", + " '628',\n", + " '253',\n", + " '632',\n", + " '633',\n", + " '6419',\n", + " '258',\n", + " '761',\n", + " '6414',\n", + " '634',\n", + " '255',\n", + " '13',\n", + " '249',\n", + " '254',\n", + " '328',\n", + " '635',\n", + " '813',\n", + " '637',\n", + " '629',\n", + " '250',\n", + " '246',\n", + " '165',\n", + " '506',\n", + " '243']" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ids = []\n", + "for id in output['initial_response']:\n", + " ids.append(str(id))\n", + "ids" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ids': ['13',\n", + " '245',\n", + " '246',\n", + " '249',\n", + " '250',\n", + " '253',\n", + " '254',\n", + " '255',\n", + " '257',\n", + " '258',\n", + " '328',\n", + " '506',\n", + " '623',\n", + " '624',\n", + " '626',\n", + " '627',\n", + " '628',\n", + " '629',\n", + " '632',\n", + " '633',\n", + " '634',\n", + " '635',\n", + " '636',\n", + " '637',\n", + " '761',\n", + " '813'],\n", + " 'embeddings': None,\n", + " 'metadatas': [{'MajorityClassSize': 201.0,\n", + " 'MaxNominalAttDistinctValues': 11.0,\n", + " 'MinorityClassSize': 85.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 10.0,\n", + " 'NumberOfInstances': 286.0,\n", + " 'NumberOfInstancesWithMissingValues': 9.0,\n", + " 'NumberOfMissingValues': 9.0,\n", + " 'NumberOfNumericFeatures': 0.0,\n", + " 'NumberOfSymbolicFeatures': 10.0,\n", + " 'Unnamed: 0': 11,\n", + " 'description': '**Author**: \\n**Source**: Unknown - \\n**Please cite**: \\n\\nCitation Request:\\n This breast cancer domain was obtained from the University Medical Centre,\\n Institute of Oncology, Ljubljana, Yugoslavia. Thanks go to M. Zwitter and \\n M. Soklic for providing the data. Please include this citation if you plan\\n to use this database.\\n \\n 1. Title: Breast cancer data (Michalski has used this)\\n \\n 2. Sources: \\n -- Matjaz Zwitter & Milan Soklic (physicians)\\n Institute of Oncology \\n University Medical Center\\n Ljubljana, Yugoslavia\\n -- Donors: Ming Tan and Jeff Schlimmer (Jeffrey.Schlimmer@a.gp.cs.cmu.edu)\\n -- Date: 11 July 1988\\n \\n 3. Past Usage: (Several: here are some)\\n -- Michalski,R.S., Mozetic,I., Hong,J., & Lavrac,N. (1986). The \\n Multi-Purpose Incremental Learning System AQ15 and its Testing \\n Application to Three Medical Domains. In Proceedings of the \\n Fifth National Conference on Artificial Intelligence, 1041-1045,\\n Philadelphia, PA: Morgan Kaufmann.\\n -- accuracy range: 66%-72%\\n -- Clark,P. & Niblett,T. (1987). Induction in Noisy Domains. In \\n Progress in Machine Learning (from the Proceedings of the 2nd\\n European Working Session on Learning), 11-30, Bled, \\n Yugoslavia: Sigma Press.\\n -- 8 test results given: 65%-72% accuracy range\\n -- Tan, M., & Eshelman, L. (1988). Using weighted networks to \\n represent classification knowledge in noisy domains. Proceedings \\n of the Fifth International Conference on Machine Learning, 121-134,\\n Ann Arbor, MI.\\n -- 4 systems tested: accuracy range was 68%-73.5%\\n -- Cestnik,G., Konenenko,I, & Bratko,I. (1987). Assistant-86: A\\n Knowledge-Elicitation Tool for Sophisticated Users. In I.Bratko\\n & N.Lavrac (Eds.) Progress in Machine Learning, 31-45, Sigma Press.\\n -- Assistant-86: 78% accuracy\\n \\n 4. Relevant Information:\\n This is one of three domains provided by the Oncology Institute\\n that has repeatedly appeared in the machine learning literature.\\n (See also lymphography and primary-tumor.)\\n \\n This data set includes 201 instances of one class and 85 instances of\\n another class. The instances are described by 9 attributes, some of\\n which are linear and some are nominal.\\n \\n 5. Number of Instances: 286\\n \\n 6. Number of Attributes: 9 + the class attribute\\n \\n 7. Attribute Information:\\n 1. Class: no-recurrence-events, recurrence-events\\n 2. age: 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70-79, 80-89, 90-99.\\n 3. menopause: lt40, ge40, premeno.\\n 4. tumor-size: 0-4, 5-9, 10-14, 15-19, 20-24, 25-29, 30-34, 35-39, 40-44,\\n 45-49, 50-54, 55-59.\\n 5. inv-nodes: 0-2, 3-5, 6-8, 9-11, 12-14, 15-17, 18-20, 21-23, 24-26,\\n 27-29, 30-32, 33-35, 36-39.\\n 6. node-caps: yes, no.\\n 7. deg-malig: 1, 2, 3.\\n 8. breast: left, right.\\n 9. breast-quad: left-up, left-low, right-up, right-low, central.\\n 10. irradiat: yes, no.\\n \\n 8. Missing Attribute Values: (denoted by \"?\")\\n Attribute #: Number of instances with missing values:\\n 6. 8\\n 9. 1.\\n \\n 9. Class Distribution:\\n 1. no-recurrence-events: 201 instances\\n 2. recurrence-events: 85 instances\\n\\n Num Instances: 286\\n Num Attributes: 10\\n Num Continuous: 0 (Int 0 / Real 0)\\n Num Discrete: 10\\n Missing values: 9 / 0.3%\\n\\n name type enum ints real missing distinct (1)\\n 1 \\'age\\' Enum 100% 0% 0% 0 / 0% 6 / 2% 0% \\n 2 \\'menopause\\' Enum 100% 0% 0% 0 / 0% 3 / 1% 0% \\n 3 \\'tumor-size\\' Enum 100% 0% 0% 0 / 0% 11 / 4% 0% \\n 4 \\'inv-nodes\\' Enum 100% 0% 0% 0 / 0% 7 / 2% 0% \\n 5 \\'node-caps\\' Enum 97% 0% 0% 8 / 3% 2 / 1% 0% \\n 6 \\'deg-malig\\' Enum 100% 0% 0% 0 / 0% 3 / 1% 0% \\n 7 \\'breast\\' Enum 100% 0% 0% 0 / 0% 2 / 1% 0% \\n 8 \\'breast-quad\\' Enum 100% 0% 0% 1 / 0% 5 / 2% 0% \\n 9 \\'irradiat\\' Enum 100% 0% 0% 0 / 0% 2 / 1% 0% \\n 10 \\'Class\\' Enum 100% 0% 0% 0 / 0% 2 / 1% 0%',\n", + " 'did': 13,\n", + " 'features': '0 : [0 - age (nominal)], 1 : [1 - menopause (nominal)], 2 : [2 - tumor-size (nominal)], 3 : [3 - inv-nodes (nominal)], 4 : [4 - node-caps (nominal)], 5 : [5 - deg-malig (nominal)], 6 : [6 - breast (nominal)], 7 : [7 - breast-quad (nominal)], 8 : [8 - irradiat (nominal)], 9 : [9 - Class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'breast-cancer',\n", + " 'qualities': 'AutoCorrelation : 0.5684210526315789, CfsSubsetEval_DecisionStumpAUC : 0.6203687445127305, CfsSubsetEval_DecisionStumpErrRate : 0.2972027972027972, CfsSubsetEval_DecisionStumpKappa : 0.2557099993876677, CfsSubsetEval_NaiveBayesAUC : 0.6780841780402799, CfsSubsetEval_NaiveBayesErrRate : 0.2727272727272727, CfsSubsetEval_NaiveBayesKappa : 0.27311827956989254, CfsSubsetEval_kNN1NAUC : 0.6306409130816506, CfsSubsetEval_kNN1NErrRate : 0.3041958041958042, CfsSubsetEval_kNN1NKappa : 0.15407629020194483, ClassEntropy : 0.8778446951746506, DecisionStumpAUC : 0.6203687445127305, DecisionStumpErrRate : 0.2972027972027972, DecisionStumpKappa : 0.2557099993876677, Dimensionality : 0.03496503496503497, EquivalentNumberOfAtts : 26.01097249370415, J48.00001.AUC : 0.4958150424348844, J48.00001.ErrRate : 0.2972027972027972, J48.00001.Kappa : 0.0, J48.0001.AUC : 0.4958150424348844, J48.0001.ErrRate : 0.2972027972027972, J48.0001.Kappa : 0.0, J48.001.AUC : 0.4958150424348844, J48.001.ErrRate : 0.2972027972027972, J48.001.Kappa : 0.0, MajorityClassPercentage : 70.27972027972028, MajorityClassSize : 201.0, MaxAttributeEntropy : 3.0243614350456793, MaxKurtosisOfNumericAtts : nan, MaxMeansOfNumericAtts : nan, MaxMutualInformation : 0.07700985251661, MaxNominalAttDistinctValues : 11.0, MaxSkewnessOfNumericAtts : nan, MaxStdDevOfNumericAtts : nan, MeanAttributeEntropy : 1.5119033534490414, MeanKurtosisOfNumericAtts : nan, MeanMeansOfNumericAtts : nan, MeanMutualInformation : 0.033749014781632225, MeanNoiseToSignalRatio : 43.798444139231265, MeanNominalAttDistinctValues : 4.3, MeanSkewnessOfNumericAtts : nan, MeanStdDevOfNumericAtts : nan, MinAttributeEntropy : 0.7670030768842513, MinKurtosisOfNumericAtts : nan, MinMeansOfNumericAtts : nan, MinMutualInformation : 0.00200161497371, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : nan, MinStdDevOfNumericAtts : nan, MinorityClassPercentage : 29.72027972027972, MinorityClassSize : 85.0, NaiveBayesAUC : 0.6949663447468539, NaiveBayesErrRate : 0.2762237762237762, NaiveBayesKappa : 0.2827756967811567, NumberOfBinaryFeatures : 4.0, NumberOfClasses : 2.0, NumberOfFeatures : 10.0, NumberOfInstances : 286.0, NumberOfInstancesWithMissingValues : 9.0, NumberOfMissingValues : 9.0, NumberOfNumericFeatures : 0.0, NumberOfSymbolicFeatures : 10.0, PercentageOfBinaryFeatures : 40.0, PercentageOfInstancesWithMissingValues : 3.146853146853147, PercentageOfMissingValues : 0.3146853146853147, PercentageOfNumericFeatures : 0.0, PercentageOfSymbolicFeatures : 100.0, Quartile1AttributeEntropy : 0.8942199564092295, Quartile1KurtosisOfNumericAtts : nan, Quartile1MeansOfNumericAtts : nan, Quartile1MutualInformation : 0.0054919850612, Quartile1SkewnessOfNumericAtts : nan, Quartile1StdDevOfNumericAtts : nan, Quartile2AttributeEntropy : 1.3186781167901145, Quartile2KurtosisOfNumericAtts : nan, Quartile2MeansOfNumericAtts : nan, Quartile2MutualInformation : 0.02581902390914, Quartile2SkewnessOfNumericAtts : nan, Quartile2StdDevOfNumericAtts : nan, Quartile3AttributeEntropy : 2.017460690540565, Quartile3KurtosisOfNumericAtts : nan, Quartile3MeansOfNumericAtts : nan, Quartile3MutualInformation : 0.06308310670709, Quartile3SkewnessOfNumericAtts : nan, Quartile3StdDevOfNumericAtts : nan, REPTreeDepth1AUC : 0.5137254901960784, REPTreeDepth1ErrRate : 0.32867132867132864, REPTreeDepth1Kappa : -0.05073086844367987, REPTreeDepth2AUC : 0.48446005267778747, REPTreeDepth2ErrRate : 0.32517482517482516, REPTreeDepth2Kappa : -0.016509974776427364, REPTreeDepth3AUC : 0.48446005267778747, REPTreeDepth3ErrRate : 0.32517482517482516, REPTreeDepth3Kappa : -0.016509974776427364, RandomTreeDepth1AUC : 0.5874158618671349, RandomTreeDepth1ErrRate : 0.2972027972027972, RandomTreeDepth1Kappa : 0.20487996336756734, RandomTreeDepth2AUC : 0.5834942932396839, RandomTreeDepth2ErrRate : 0.2867132867132867, RandomTreeDepth2Kappa : 0.21811028872441177, RandomTreeDepth3AUC : 0.5585308750365817, RandomTreeDepth3ErrRate : 0.32867132867132864, RandomTreeDepth3Kappa : 0.15591836734693884, StdvNominalAttDistinctValues : 2.9832867780352594, kNN1NAUC : 0.5933274802458297, kNN1NErrRate : 0.3006993006993007, kNN1NKappa : 0.18626348177066082,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 759652.0,\n", + " 'MaxNominalAttDistinctValues': 9.0,\n", + " 'MinorityClassSize': 555.0,\n", + " 'NumberOfClasses': 6.0,\n", + " 'NumberOfFeatures': 39.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 6.0,\n", + " 'NumberOfSymbolicFeatures': 33.0,\n", + " 'Unnamed: 0': 166,\n", + " 'did': 245,\n", + " 'features': '0 : [0 - family (nominal)], 1 : [1 - product-type (nominal)], 2 : [2 - steel (nominal)], 3 : [3 - carbon (numeric)], 4 : [4 - hardness (numeric)], 5 : [5 - temper_rolling (nominal)], 6 : [6 - condition (nominal)], 7 : [7 - formability (nominal)], 8 : [8 - strength (numeric)], 9 : [9 - non-ageing (nominal)], 10 : [10 - surface-finish (nominal)], 11 : [11 - surface-quality (nominal)], 12 : [12 - enamelability (nominal)], 13 : [13 - bc (nominal)], 14 : [14 - bf (nominal)], 15 : [15 - bt (nominal)], 16 : [16 - bw%2Fme (nominal)], 17 : [17 - bl (nominal)], 18 : [18 - m (nominal)], 19 : [19 - chrom (nominal)], 20 : [20 - phos (nominal)], 21 : [21 - cbond (nominal)], 22 : [22 - marvi (nominal)], 23 : [23 - exptl (nominal)], 24 : [24 - ferro (nominal)], 25 : [25 - corr (nominal)], 26 : [26 - blue%2Fbright%2Fvarn%2Fclean (nominal)], 27 : [27 - lustre (nominal)], 28 : [28 - jurofm (nominal)], 29 : [29 - s (nominal)], 30 : [30 - p (nominal)], 31 : [31 - shape (nominal)], 32 : [32 - thick (numeric)], 33 : [33 - width (numeric)], 34 : [34 - len (numeric)], 35 : [35 - oil (nominal)], 36 : [36 - bore (nominal)], 37 : [37 - packing (nominal)], 38 : [38 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(anneal.ORIG)',\n", + " 'qualities': 'AutoCorrelation : 0.5966435966435967, CfsSubsetEval_DecisionStumpAUC : 0.9100425379819419, CfsSubsetEval_DecisionStumpErrRate : 0.140681, CfsSubsetEval_DecisionStumpKappa : 0.6220031797879997, CfsSubsetEval_NaiveBayesAUC : 0.9100425379819419, CfsSubsetEval_NaiveBayesErrRate : 0.140681, CfsSubsetEval_NaiveBayesKappa : 0.6220031797879997, CfsSubsetEval_kNN1NAUC : 0.9100425379819419, CfsSubsetEval_kNN1NErrRate : 0.140681, CfsSubsetEval_kNN1NKappa : 0.6220031797879997, ClassEntropy : 1.2034178134061593, DecisionStumpAUC : 0.5879879850231294, DecisionStumpErrRate : 0.240348, DecisionStumpKappa : 0.0, Dimensionality : 3.9e-05, EquivalentNumberOfAtts : 60.11837170459382, J48.00001.AUC : 0.9448491725907372, J48.00001.ErrRate : 0.100573, J48.00001.Kappa : 0.7443968596935974, J48.0001.AUC : 0.9448491725907372, J48.0001.ErrRate : 0.100573, J48.0001.Kappa : 0.7443968596935974, J48.001.AUC : 0.9448491725907372, J48.001.ErrRate : 0.100573, J48.001.Kappa : 0.7443968596935974, MajorityClassPercentage : 75.9652, MajorityClassSize : 759652.0, MaxAttributeEntropy : 1.684380880218795, MaxKurtosisOfNumericAtts : 7.972323991673457, MaxMeansOfNumericAtts : 1303.5986793821992, MaxMutualInformation : 0.22418299169114, MaxNominalAttDistinctValues : 9.0, MaxSkewnessOfNumericAtts : 3.049930007485628, MaxStdDevOfNumericAtts : 1887.984782494524, MeanAttributeEntropy : 0.2550663392586401, MeanKurtosisOfNumericAtts : 2.8203680278627603, MeanMeansOfNumericAtts : 358.9037324354391, MeanMutualInformation : 0.02001747185235562, MeanNoiseToSignalRatio : 11.742185483758997, MeanNominalAttDistinctValues : 2.4242424242424248, MeanSkewnessOfNumericAtts : 1.7379232001015346, MeanStdDevOfNumericAtts : 412.40611146101406, MinAttributeEntropy : -0.0, MinKurtosisOfNumericAtts : -0.868382824192059, MinMeansOfNumericAtts : 1.17735989039, MinMutualInformation : 0.0, MinNominalAttDistinctValues : 1.0, MinSkewnessOfNumericAtts : 0.0777861407829806, MinStdDevOfNumericAtts : 0.8657360725686571, MinorityClassPercentage : 0.05550000000000001, MinorityClassSize : 555.0, NaiveBayesAUC : 0.8801291929503372, NaiveBayesErrRate : 0.190879, NaiveBayesKappa : 0.5362687884269597, NumberOfBinaryFeatures : 4.0, NumberOfClasses : 6.0, NumberOfFeatures : 39.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 6.0, NumberOfSymbolicFeatures : 33.0, PercentageOfBinaryFeatures : 10.256410256410255, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 15.384615384615385, PercentageOfSymbolicFeatures : 84.61538461538461, Quartile1AttributeEntropy : 0.0, Quartile1KurtosisOfNumericAtts : -0.40342290066472575, Quartile1MeansOfNumericAtts : 4.121271656681081, Quartile1MutualInformation : 0.0, Quartile1SkewnessOfNumericAtts : 0.9465055395022927, Quartile1StdDevOfNumericAtts : 12.199935237910706, Quartile2AttributeEntropy : 0.0, Quartile2KurtosisOfNumericAtts : 1.1822680963070171, Quartile2MeansOfNumericAtts : 29.069557551355082, Quartile2MutualInformation : 0.0, Quartile2SkewnessOfNumericAtts : 1.5129572065602566, Quartile2StdDevOfNumericAtts : 81.31421932163484, Quartile3AttributeEntropy : 0.3151059368331584, Quartile3KurtosisOfNumericAtts : 7.769706297845923, Quartile3MeansOfNumericAtts : 914.9531683394675, Quartile3MutualInformation : 0.0134922075411625, Quartile3SkewnessOfNumericAtts : 3.0408549769799245, Quartile3StdDevOfNumericAtts : 777.2309780706541, REPTreeDepth1AUC : 0.9583107972293207, REPTreeDepth1ErrRate : 0.103254, REPTreeDepth1Kappa : 0.7370276932423044, REPTreeDepth2AUC : 0.9583107972293207, REPTreeDepth2ErrRate : 0.103254, REPTreeDepth2Kappa : 0.7370276932423044, REPTreeDepth3AUC : 0.9583107972293207, REPTreeDepth3ErrRate : 0.103254, REPTreeDepth3Kappa : 0.7370276932423044, RandomTreeDepth1AUC : 0.8355478147338595, RandomTreeDepth1ErrRate : 0.136256, RandomTreeDepth1Kappa : 0.6600393445396219, RandomTreeDepth2AUC : 0.8355478147338595, RandomTreeDepth2ErrRate : 0.136256, RandomTreeDepth2Kappa : 0.6600393445396219, RandomTreeDepth3AUC : 0.8355478147338595, RandomTreeDepth3ErrRate : 0.136256, RandomTreeDepth3Kappa : 0.6600393445396219, StdvNominalAttDistinctValues : 2.136444228009226, kNN1NAUC : 0.8314600775134281, kNN1NErrRate : 0.13295, kNN1NKappa : 0.6662701843447909,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 2},\n", + " {'MajorityClassSize': 647000.0,\n", + " 'MaxNominalAttDistinctValues': 3.0,\n", + " 'MinorityClassSize': 353000.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 17.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 8.0,\n", + " 'NumberOfSymbolicFeatures': 9.0,\n", + " 'Unnamed: 0': 167,\n", + " 'did': 246,\n", + " 'features': '0 : [0 - duration (numeric)], 1 : [1 - wage-increase-first-year (numeric)], 2 : [2 - wage-increase-second-year (numeric)], 3 : [3 - wage-increase-third-year (numeric)], 4 : [4 - cost-of-living-adjustment (nominal)], 5 : [5 - working-hours (numeric)], 6 : [6 - pension (nominal)], 7 : [7 - standby-pay (numeric)], 8 : [8 - shift-differential (numeric)], 9 : [9 - education-allowance (nominal)], 10 : [10 - statutory-holidays (numeric)], 11 : [11 - vacation (nominal)], 12 : [12 - longterm-disability-assistance (nominal)], 13 : [13 - contribution-to-dental-plan (nominal)], 14 : [14 - bereavement-assistance (nominal)], 15 : [15 - contribution-to-health-plan (nominal)], 16 : [16 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(labor)',\n", + " 'qualities': 'AutoCorrelation : 0.5437515437515438, CfsSubsetEval_DecisionStumpAUC : 0.9656760571782601, CfsSubsetEval_DecisionStumpErrRate : 0.073896, CfsSubsetEval_DecisionStumpKappa : 0.8361860872136263, CfsSubsetEval_NaiveBayesAUC : 0.9656760571782601, CfsSubsetEval_NaiveBayesErrRate : 0.073896, CfsSubsetEval_NaiveBayesKappa : 0.8361860872136263, CfsSubsetEval_kNN1NAUC : 0.9656760571782601, CfsSubsetEval_kNN1NErrRate : 0.073896, CfsSubsetEval_kNN1NKappa : 0.8361860872136263, ClassEntropy : 0.9367188103082079, DecisionStumpAUC : 0.7610196570793069, DecisionStumpErrRate : 0.17252, DecisionStumpKappa : 0.58038171156978, Dimensionality : 1.7e-05, EquivalentNumberOfAtts : 7.968568094624322, J48.00001.AUC : 0.9722429466178615, J48.00001.ErrRate : 0.04276, J48.00001.Kappa : 0.9060997150754404, J48.0001.AUC : 0.9722429466178615, J48.0001.ErrRate : 0.04276, J48.0001.Kappa : 0.9060997150754404, J48.001.AUC : 0.9722429466178615, J48.001.ErrRate : 0.04276, J48.001.Kappa : 0.9060997150754404, MajorityClassPercentage : 64.7, MajorityClassSize : 647000.0, MaxAttributeEntropy : 1.5686613437902412, MaxKurtosisOfNumericAtts : 12.97183970122256, MaxMeansOfNumericAtts : 38.207419791592, MaxMutualInformation : 0.32803097149722, MaxNominalAttDistinctValues : 3.0, MaxSkewnessOfNumericAtts : 3.219955909368285, MaxStdDevOfNumericAtts : 3.8364066013681923, MeanAttributeEntropy : 1.030150865706383, MeanKurtosisOfNumericAtts : 2.8964429932042943, MeanMeansOfNumericAtts : 8.968741018983373, MeanMutualInformation : 0.117551710568945, MeanNoiseToSignalRatio : 7.763384732731654, MeanNominalAttDistinctValues : 2.5555555555555554, MeanSkewnessOfNumericAtts : 0.34601419937813327, MeanStdDevOfNumericAtts : 1.891451954866257, MinAttributeEntropy : 0.3890238905472041, MinKurtosisOfNumericAtts : -0.971191333863191, MinMeansOfNumericAtts : 2.151549000000004, MinMutualInformation : 0.00031099636463, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : -2.492429789250301, MinStdDevOfNumericAtts : 0.7025670033463544, MinorityClassPercentage : 35.3, MinorityClassSize : 353000.0, NaiveBayesAUC : 0.9555395606219708, NaiveBayesErrRate : 0.094793, NaiveBayesKappa : 0.7925457977917215, NumberOfBinaryFeatures : 4.0, NumberOfClasses : 2.0, NumberOfFeatures : 17.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 8.0, NumberOfSymbolicFeatures : 9.0, PercentageOfBinaryFeatures : 23.52941176470588, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 47.05882352941176, PercentageOfSymbolicFeatures : 52.94117647058824, Quartile1AttributeEntropy : 0.6564960185674868, Quartile1KurtosisOfNumericAtts : -0.4230542944993998, Quartile1MeansOfNumericAtts : 3.4821577465324913, Quartile1MutualInformation : 0.0406841268226125, Quartile1SkewnessOfNumericAtts : -1.281488718852625, Quartile1StdDevOfNumericAtts : 0.9407666804578398, Quartile2AttributeEntropy : 1.1218408001470537, Quartile2KurtosisOfNumericAtts : 1.7537742946990273, Quartile2MeansOfNumericAtts : 4.105879276327004, Quartile2MutualInformation : 0.082880156145935, Quartile2SkewnessOfNumericAtts : 0.3087406103320252, Quartile2StdDevOfNumericAtts : 1.3717497404114476, Quartile3AttributeEntropy : 1.3559623783139811, Quartile3KurtosisOfNumericAtts : 4.405602715826151, Quartile3MeansOfNumericAtts : 9.656747548797744, Quartile3MutualInformation : 0.1902258165988925, Quartile3SkewnessOfNumericAtts : 1.9521046933055897, Quartile3StdDevOfNumericAtts : 3.1629904450022495, REPTreeDepth1AUC : 0.9835080135775928, REPTreeDepth1ErrRate : 0.044215, REPTreeDepth1Kappa : 0.9028913293511758, REPTreeDepth2AUC : 0.9835080135775928, REPTreeDepth2ErrRate : 0.044215, REPTreeDepth2Kappa : 0.9028913293511758, REPTreeDepth3AUC : 0.9835080135775928, REPTreeDepth3ErrRate : 0.044215, REPTreeDepth3Kappa : 0.9028913293511758, RandomTreeDepth1AUC : 0.9368127614485684, RandomTreeDepth1ErrRate : 0.057895, RandomTreeDepth1Kappa : 0.8732579824692726, RandomTreeDepth2AUC : 0.9368127614485684, RandomTreeDepth2ErrRate : 0.057895, RandomTreeDepth2Kappa : 0.8732579824692726, RandomTreeDepth3AUC : 0.9368127614485684, RandomTreeDepth3ErrRate : 0.057895, RandomTreeDepth3Kappa : 0.8732579824692726, StdvNominalAttDistinctValues : 0.5270462766947299, kNN1NAUC : 0.9369440921752609, kNN1NErrRate : 0.057355, kNN1NKappa : 0.8743678466255509,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 2},\n", + " {'MajorityClassSize': 543495.0,\n", + " 'MaxNominalAttDistinctValues': 8.0,\n", + " 'MinorityClassSize': 16508.0,\n", + " 'NumberOfClasses': 4.0,\n", + " 'NumberOfFeatures': 19.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 3.0,\n", + " 'NumberOfSymbolicFeatures': 16.0,\n", + " 'Unnamed: 0': 170,\n", + " 'did': 249,\n", + " 'features': '0 : [0 - lymphatics (nominal)], 1 : [1 - block_of_affere (nominal)], 2 : [2 - bl_of_lymph_c (nominal)], 3 : [3 - bl_of_lymph_s (nominal)], 4 : [4 - by_pass (nominal)], 5 : [5 - extravasates (nominal)], 6 : [6 - regeneration_of (nominal)], 7 : [7 - early_uptake_in (nominal)], 8 : [8 - lym_nodes_dimin (numeric)], 9 : [9 - lym_nodes_enlar (numeric)], 10 : [10 - changes_in_lym (nominal)], 11 : [11 - defect_in_node (nominal)], 12 : [12 - changes_in_node (nominal)], 13 : [13 - changes_in_stru (nominal)], 14 : [14 - special_forms (nominal)], 15 : [15 - dislocation_of (nominal)], 16 : [16 - exclusion_of_no (nominal)], 17 : [17 - no_of_nodes_in (numeric)], 18 : [18 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(lymph)',\n", + " 'qualities': 'AutoCorrelation : 0.46374446374446376, CfsSubsetEval_DecisionStumpAUC : 0.9525893765375607, CfsSubsetEval_DecisionStumpErrRate : 0.106959, CfsSubsetEval_DecisionStumpKappa : 0.7987920883644265, CfsSubsetEval_NaiveBayesAUC : 0.9525893765375607, CfsSubsetEval_NaiveBayesErrRate : 0.106959, CfsSubsetEval_NaiveBayesKappa : 0.7987920883644265, CfsSubsetEval_kNN1NAUC : 0.9525893765375607, CfsSubsetEval_kNN1NErrRate : 0.106959, CfsSubsetEval_kNN1NKappa : 0.7987920883644265, ClassEntropy : 1.2562056674968567, DecisionStumpAUC : 0.7532346598100107, DecisionStumpErrRate : 0.273471, DecisionStumpKappa : 0.4788451693464286, Dimensionality : 1.9e-05, EquivalentNumberOfAtts : 15.260742108126458, J48.00001.AUC : 0.9618119207180185, J48.00001.ErrRate : 0.08244, J48.00001.Kappa : 0.8451590629533324, J48.0001.AUC : 0.9618119207180185, J48.0001.ErrRate : 0.08244, J48.0001.Kappa : 0.8451590629533324, J48.001.AUC : 0.9618119207180185, J48.001.ErrRate : 0.08244, J48.001.Kappa : 0.8451590629533324, MajorityClassPercentage : 54.34949999999999, MajorityClassSize : 543495.0, MaxAttributeEntropy : 2.723239634537262, MaxKurtosisOfNumericAtts : 18.75326070429752, MaxMeansOfNumericAtts : 2.6134477578170023, MaxMutualInformation : 0.26343480307106, MaxNominalAttDistinctValues : 8.0, MaxSkewnessOfNumericAtts : 4.411711376693278, MaxStdDevOfNumericAtts : 1.8756448367786893, MeanAttributeEntropy : 1.1603538163656784, MeanKurtosisOfNumericAtts : 6.157034389713749, MeanMeansOfNumericAtts : 2.0712172526056696, MeanMutualInformation : 0.08231615858496934, MeanNoiseToSignalRatio : 13.096306682823695, MeanNominalAttDistinctValues : 3.0, MeanSkewnessOfNumericAtts : 1.9564480044895443, MeanStdDevOfNumericAtts : 1.0314658867612234, MinAttributeEntropy : 0.3452749744006563, MinKurtosisOfNumericAtts : -0.6385294023659185, MinMeansOfNumericAtts : 1.0865320000000074, MinMutualInformation : 0.00885303321242, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : 0.2818333847334242, MinStdDevOfNumericAtts : 0.3679732984584612, MinorityClassPercentage : 1.6507999999999998, MinorityClassSize : 16508.0, NaiveBayesAUC : 0.9468313845335228, NaiveBayesErrRate : 0.130285, NaiveBayesKappa : 0.7587133376199122, NumberOfBinaryFeatures : 9.0, NumberOfClasses : 4.0, NumberOfFeatures : 19.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 3.0, NumberOfSymbolicFeatures : 16.0, PercentageOfBinaryFeatures : 47.368421052631575, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 15.789473684210526, PercentageOfSymbolicFeatures : 84.21052631578947, Quartile1AttributeEntropy : 0.7877261423146888, Quartile1KurtosisOfNumericAtts : -0.6385294023659185, Quartile1MeansOfNumericAtts : 1.0865320000000074, Quartile1MutualInformation : 0.03861619480877, Quartile1SkewnessOfNumericAtts : 0.2818333847334242, Quartile1StdDevOfNumericAtts : 0.3679732984584612, Quartile2AttributeEntropy : 0.9913699371659555, Quartile2KurtosisOfNumericAtts : 0.35637186720964387, Quartile2MeansOfNumericAtts : 2.5136719999999984, Quartile2MutualInformation : 0.06535685954581, Quartile2SkewnessOfNumericAtts : 1.1757992520419303, Quartile2StdDevOfNumericAtts : 0.8507795250465198, Quartile3AttributeEntropy : 1.6373342149481895, Quartile3KurtosisOfNumericAtts : 18.75326070429752, Quartile3MeansOfNumericAtts : 2.6134477578170023, Quartile3MutualInformation : 0.11813432242576, Quartile3SkewnessOfNumericAtts : 4.411711376693278, Quartile3StdDevOfNumericAtts : 1.8756448367786893, REPTreeDepth1AUC : 0.9654312871259095, REPTreeDepth1ErrRate : 0.089084, REPTreeDepth1Kappa : 0.8325292469524697, REPTreeDepth2AUC : 0.9654312871259095, REPTreeDepth2ErrRate : 0.089084, REPTreeDepth2Kappa : 0.8325292469524697, REPTreeDepth3AUC : 0.9654312871259095, REPTreeDepth3ErrRate : 0.089084, REPTreeDepth3Kappa : 0.8325292469524697, RandomTreeDepth1AUC : 0.9148261423353723, RandomTreeDepth1ErrRate : 0.10737, RandomTreeDepth1Kappa : 0.7986168101743286, RandomTreeDepth2AUC : 0.9148261423353723, RandomTreeDepth2ErrRate : 0.10737, RandomTreeDepth2Kappa : 0.7986168101743286, RandomTreeDepth3AUC : 0.9148261423353723, RandomTreeDepth3ErrRate : 0.10737, RandomTreeDepth3Kappa : 0.7986168101743286, StdvNominalAttDistinctValues : 1.591644851508443, kNN1NAUC : 0.9389865952001077, kNN1NErrRate : 0.100868, kNN1NKappa : 0.8096255087337635,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 100515.0,\n", + " 'MaxNominalAttDistinctValues': 10.0,\n", + " 'MinorityClassSize': 99530.0,\n", + " 'NumberOfClasses': 10.0,\n", + " 'NumberOfFeatures': 77.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 76.0,\n", + " 'NumberOfSymbolicFeatures': 1.0,\n", + " 'Unnamed: 0': 171,\n", + " 'did': 250,\n", + " 'features': '0 : [0 - att1 (numeric)], 1 : [1 - att2 (numeric)], 2 : [2 - att3 (numeric)], 3 : [3 - att4 (numeric)], 4 : [4 - att5 (numeric)], 5 : [5 - att6 (numeric)], 6 : [6 - att7 (numeric)], 7 : [7 - att8 (numeric)], 8 : [8 - att9 (numeric)], 9 : [9 - att10 (numeric)], 10 : [10 - att11 (numeric)], 11 : [11 - att12 (numeric)], 12 : [12 - att13 (numeric)], 13 : [13 - att14 (numeric)], 14 : [14 - att15 (numeric)], 15 : [15 - att16 (numeric)], 16 : [16 - att17 (numeric)], 17 : [17 - att18 (numeric)], 18 : [18 - att19 (numeric)], 19 : [19 - att20 (numeric)], 20 : [20 - att21 (numeric)], 21 : [21 - att22 (numeric)], 22 : [22 - att23 (numeric)], 23 : [23 - att24 (numeric)], 24 : [24 - att25 (numeric)], 25 : [25 - att26 (numeric)], 26 : [26 - att27 (numeric)], 27 : [27 - att28 (numeric)], 28 : [28 - att29 (numeric)], 29 : [29 - att30 (numeric)], 30 : [30 - att31 (numeric)], 31 : [31 - att32 (numeric)], 32 : [32 - att33 (numeric)], 33 : [33 - att34 (numeric)], 34 : [34 - att35 (numeric)], 35 : [35 - att36 (numeric)], 36 : [36 - att37 (numeric)], 37 : [37 - att38 (numeric)], 38 : [38 - att39 (numeric)], 39 : [39 - att40 (numeric)], 40 : [40 - att41 (numeric)], 41 : [41 - att42 (numeric)], 42 : [42 - att43 (numeric)], 43 : [43 - att44 (numeric)], 44 : [44 - att45 (numeric)], 45 : [45 - att46 (numeric)], 46 : [46 - att47 (numeric)], 47 : [47 - att48 (numeric)], 48 : [48 - att49 (numeric)], 49 : [49 - att50 (numeric)], 50 : [50 - att51 (numeric)], 51 : [51 - att52 (numeric)], 52 : [52 - att53 (numeric)], 53 : [53 - att54 (numeric)], 54 : [54 - att55 (numeric)], 55 : [55 - att56 (numeric)], 56 : [56 - att57 (numeric)], 57 : [57 - att58 (numeric)], 58 : [58 - att59 (numeric)], 59 : [59 - att60 (numeric)], 60 : [60 - att61 (numeric)], 61 : [61 - att62 (numeric)], 62 : [62 - att63 (numeric)], 63 : [63 - att64 (numeric)], 64 : [64 - att65 (numeric)], 65 : [65 - att66 (numeric)], 66 : [66 - att67 (numeric)], 67 : [67 - att68 (numeric)], 68 : [68 - att69 (numeric)], 69 : [69 - att70 (numeric)], 70 : [70 - att71 (numeric)], 71 : [71 - att72 (numeric)], 72 : [72 - att73 (numeric)], 73 : [73 - att74 (numeric)], 74 : [74 - att75 (numeric)], 75 : [75 - att76 (numeric)], 76 : [76 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(mfeat-fourier)',\n", + " 'qualities': 'AutoCorrelation : 0.09995609995609996, CfsSubsetEval_DecisionStumpAUC : 0.8921734291380132, CfsSubsetEval_DecisionStumpErrRate : 0.203381, CfsSubsetEval_DecisionStumpKappa : 0.7740212639581063, CfsSubsetEval_NaiveBayesAUC : 0.8921734291380132, CfsSubsetEval_NaiveBayesErrRate : 0.203381, CfsSubsetEval_NaiveBayesKappa : 0.7740212639581063, CfsSubsetEval_kNN1NAUC : 0.8921734291380132, CfsSubsetEval_kNN1NErrRate : 0.203381, CfsSubsetEval_kNN1NKappa : 0.7740212639581063, ClassEntropy : 3.3219227318547007, DecisionStumpAUC : 0.7109731867646303, DecisionStumpErrRate : 0.805155, DecisionStumpKappa : 0.10529015009179829, Dimensionality : 7.7e-05, EquivalentNumberOfAtts : nan, J48.00001.AUC : 0.8894162192689907, J48.00001.ErrRate : 0.206049, J48.00001.Kappa : 0.7710566731554293, J48.0001.AUC : 0.8894162192689907, J48.0001.ErrRate : 0.206049, J48.0001.Kappa : 0.7710566731554293, J48.001.AUC : 0.8894162192689907, J48.001.ErrRate : 0.206049, J48.001.Kappa : 0.7710566731554293, MajorityClassPercentage : 10.051499999999999, MajorityClassSize : 100515.0, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 0.6624318898743744, MaxMeansOfNumericAtts : 0.377149516035, MaxMutualInformation : nan, MaxNominalAttDistinctValues : 10.0, MaxSkewnessOfNumericAtts : 0.877977217763201, MaxStdDevOfNumericAtts : 0.1761529503096837, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.24796506036691535, MeanMeansOfNumericAtts : 0.13189745775739473, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : 10.0, MeanSkewnessOfNumericAtts : 0.4959078781308409, MeanStdDevOfNumericAtts : 0.06698397495556621, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.1546436293959819, MinMeansOfNumericAtts : 0.071795535224, MinMutualInformation : nan, MinNominalAttDistinctValues : 10.0, MinSkewnessOfNumericAtts : -0.12215327407774229, MinStdDevOfNumericAtts : 0.03754417585218609, MinorityClassPercentage : 9.953, MinorityClassSize : 99530.0, NaiveBayesAUC : 0.9841125536254328, NaiveBayesErrRate : 0.167491, NaiveBayesKappa : 0.813899488564289, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 10.0, NumberOfFeatures : 77.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 76.0, NumberOfSymbolicFeatures : 1.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 98.7012987012987, PercentageOfSymbolicFeatures : 1.2987012987012987, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -0.406046313038389, Quartile1MeansOfNumericAtts : 0.08562420371724999, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : 0.37830246153929625, Quartile1StdDevOfNumericAtts : 0.042614212594271055, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -0.25087992094015243, Quartile2MeansOfNumericAtts : 0.10541226061600001, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.5077806078952078, Quartile2StdDevOfNumericAtts : 0.052663183689287144, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -0.057890037692025054, Quartile3MeansOfNumericAtts : 0.15621728845225, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.6744091647832666, Quartile3StdDevOfNumericAtts : 0.08653997068362977, REPTreeDepth1AUC : 0.9653716870406467, REPTreeDepth1ErrRate : 0.186312, REPTreeDepth1Kappa : 0.7929864520704425, REPTreeDepth2AUC : 0.9653716870406467, REPTreeDepth2ErrRate : 0.186312, REPTreeDepth2Kappa : 0.7929864520704425, REPTreeDepth3AUC : 0.9653716870406467, REPTreeDepth3ErrRate : 0.186312, REPTreeDepth3Kappa : 0.7929864520704425, RandomTreeDepth1AUC : 0.8343781387714035, RandomTreeDepth1ErrRate : 0.298118, RandomTreeDepth1Kappa : 0.6687575197766904, RandomTreeDepth2AUC : 0.8343781387714035, RandomTreeDepth2ErrRate : 0.298118, RandomTreeDepth2Kappa : 0.6687575197766904, RandomTreeDepth3AUC : 0.8343781387714035, RandomTreeDepth3ErrRate : 0.298118, RandomTreeDepth3Kappa : 0.6687575197766904, StdvNominalAttDistinctValues : 0.0, kNN1NAUC : 0.8639133786025055, kNN1NErrRate : 0.244952, kNN1NKappa : 0.7278308053011991,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 423139.0,\n", + " 'MaxNominalAttDistinctValues': 108.0,\n", + " 'MinorityClassSize': 95207.0,\n", + " 'NumberOfClasses': 6.0,\n", + " 'NumberOfFeatures': 13.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 3.0,\n", + " 'NumberOfSymbolicFeatures': 10.0,\n", + " 'Unnamed: 0': 174,\n", + " 'did': 253,\n", + " 'features': '0 : [0 - IDENTIF (nominal)], 1 : [1 - RIVER (nominal)], 2 : [2 - LOCATION (nominal)], 3 : [3 - ERECTED (numeric)], 4 : [4 - PURPOSE (nominal)], 5 : [5 - LENGTH (numeric)], 6 : [6 - LANES (numeric)], 7 : [7 - CLEAR-G (nominal)], 8 : [8 - T-OR-D (nominal)], 9 : [9 - MATERIAL (nominal)], 10 : [10 - SPAN (nominal)], 11 : [11 - REL-L (nominal)], 12 : [12 - TYPE (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(bridges_version1)',\n", + " 'qualities': 'AutoCorrelation : 0.24726624726624727, CfsSubsetEval_DecisionStumpAUC : 0.9013235091316036, CfsSubsetEval_DecisionStumpErrRate : 0.239257, CfsSubsetEval_DecisionStumpKappa : 0.6774847554514949, CfsSubsetEval_NaiveBayesAUC : 0.9013235091316036, CfsSubsetEval_NaiveBayesErrRate : 0.239257, CfsSubsetEval_NaiveBayesKappa : 0.6774847554514949, CfsSubsetEval_kNN1NAUC : 0.9013235091316036, CfsSubsetEval_kNN1NErrRate : 0.239257, CfsSubsetEval_kNN1NKappa : 0.6774847554514949, ClassEntropy : 2.3111914001313076, DecisionStumpAUC : 0.6420991031898386, DecisionStumpErrRate : 0.45358, DecisionStumpKappa : 0.2734045446768764, Dimensionality : 1.3e-05, EquivalentNumberOfAtts : 11.648034501491923, J48.00001.AUC : 0.9277116524218614, J48.00001.ErrRate : 0.222551, J48.00001.Kappa : 0.7011230238646798, J48.0001.AUC : 0.9277116524218614, J48.0001.ErrRate : 0.222551, J48.0001.Kappa : 0.7011230238646798, J48.001.AUC : 0.9277116524218614, J48.001.ErrRate : 0.222551, J48.001.Kappa : 0.7011230238646798, MajorityClassPercentage : 42.3139, MajorityClassSize : 423139.0, MaxAttributeEntropy : 6.741653104159785, MaxKurtosisOfNumericAtts : 2.0172116916268146, MaxMeansOfNumericAtts : 1904.954559552158, MaxMutualInformation : 0.39516698569579, MaxNominalAttDistinctValues : 108.0, MaxSkewnessOfNumericAtts : 1.4769781921355956, MaxStdDevOfNumericAtts : 707.812837301915, MeanAttributeEntropy : 2.334660760876463, MeanKurtosisOfNumericAtts : 0.9285403013167219, MeanMeansOfNumericAtts : 1158.9873723365604, MeanMutualInformation : 0.19841900363836334, MeanNoiseToSignalRatio : 10.766316320847949, MeanNominalAttDistinctValues : 18.899999999999995, MeanSkewnessOfNumericAtts : 0.9263922280697658, MeanStdDevOfNumericAtts : 248.64962036728954, MinAttributeEntropy : 0.6275905895720326, MinKurtosisOfNumericAtts : -0.41831619940171727, MinMeansOfNumericAtts : 2.593721999999993, MinMutualInformation : 0.07202002830771, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : -0.09442560859996699, MinStdDevOfNumericAtts : 1.211040731494273, MinorityClassPercentage : 9.5207, MinorityClassSize : 95207.0, NaiveBayesAUC : 0.9364007601799215, NaiveBayesErrRate : 0.257371, NaiveBayesKappa : 0.6500644214878881, NumberOfBinaryFeatures : 2.0, NumberOfClasses : 6.0, NumberOfFeatures : 13.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 3.0, NumberOfSymbolicFeatures : 10.0, PercentageOfBinaryFeatures : 15.384615384615385, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 23.076923076923077, PercentageOfSymbolicFeatures : 76.92307692307693, Quartile1AttributeEntropy : 1.039817026284356, Quartile1KurtosisOfNumericAtts : -0.41831619940171727, Quartile1MeansOfNumericAtts : 2.593721999999993, Quartile1MutualInformation : 0.120056977311615, Quartile1SkewnessOfNumericAtts : -0.09442560859996699, Quartile1StdDevOfNumericAtts : 1.211040731494273, Quartile2AttributeEntropy : 1.3607191418046507, Quartile2KurtosisOfNumericAtts : 1.1867254117250687, Quartile2MeansOfNumericAtts : 1569.413835457523, Quartile2MutualInformation : 0.19956188200722, Quartile2SkewnessOfNumericAtts : 1.396624100673669, Quartile2StdDevOfNumericAtts : 36.92498306845929, Quartile3AttributeEntropy : 3.708889792926569, Quartile3KurtosisOfNumericAtts : 2.0172116916268146, Quartile3MeansOfNumericAtts : 1904.954559552158, Quartile3MutualInformation : 0.244558403316625, Quartile3SkewnessOfNumericAtts : 1.4769781921355956, Quartile3StdDevOfNumericAtts : 707.812837301915, REPTreeDepth1AUC : 0.8979153822621934, REPTreeDepth1ErrRate : 0.283543, REPTreeDepth1Kappa : 0.6147398984209902, REPTreeDepth2AUC : 0.8979153822621934, REPTreeDepth2ErrRate : 0.283543, REPTreeDepth2Kappa : 0.6147398984209902, REPTreeDepth3AUC : 0.8979153822621934, REPTreeDepth3ErrRate : 0.283543, REPTreeDepth3Kappa : 0.6147398984209902, RandomTreeDepth1AUC : 0.8287258432327871, RandomTreeDepth1ErrRate : 0.313444, RandomTreeDepth1Kappa : 0.580962925530154, RandomTreeDepth2AUC : 0.8287258432327871, RandomTreeDepth2ErrRate : 0.313444, RandomTreeDepth2Kappa : 0.580962925530154, RandomTreeDepth3AUC : 0.8287258432327871, RandomTreeDepth3ErrRate : 0.313444, RandomTreeDepth3Kappa : 0.580962925530154, StdvNominalAttDistinctValues : 35.13608464875454, kNN1NAUC : 0.803322308049043, kNN1NErrRate : 0.303501, kNN1NKappa : 0.5933954863853881,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 100289.0,\n", + " 'MaxNominalAttDistinctValues': 10.0,\n", + " 'MinorityClassSize': 99797.0,\n", + " 'NumberOfClasses': 10.0,\n", + " 'NumberOfFeatures': 48.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 47.0,\n", + " 'NumberOfSymbolicFeatures': 1.0,\n", + " 'Unnamed: 0': 175,\n", + " 'did': 254,\n", + " 'features': '0 : [0 - att1 (numeric)], 1 : [1 - att2 (numeric)], 2 : [2 - att3 (numeric)], 3 : [3 - att4 (numeric)], 4 : [4 - att5 (numeric)], 5 : [5 - att6 (numeric)], 6 : [6 - att7 (numeric)], 7 : [7 - att8 (numeric)], 8 : [8 - att9 (numeric)], 9 : [9 - att10 (numeric)], 10 : [10 - att11 (numeric)], 11 : [11 - att12 (numeric)], 12 : [12 - att13 (numeric)], 13 : [13 - att14 (numeric)], 14 : [14 - att15 (numeric)], 15 : [15 - att16 (numeric)], 16 : [16 - att17 (numeric)], 17 : [17 - att18 (numeric)], 18 : [18 - att19 (numeric)], 19 : [19 - att20 (numeric)], 20 : [20 - att21 (numeric)], 21 : [21 - att22 (numeric)], 22 : [22 - att23 (numeric)], 23 : [23 - att24 (numeric)], 24 : [24 - att25 (numeric)], 25 : [25 - att26 (numeric)], 26 : [26 - att27 (numeric)], 27 : [27 - att28 (numeric)], 28 : [28 - att29 (numeric)], 29 : [29 - att30 (numeric)], 30 : [30 - att31 (numeric)], 31 : [31 - att32 (numeric)], 32 : [32 - att33 (numeric)], 33 : [33 - att34 (numeric)], 34 : [34 - att35 (numeric)], 35 : [35 - att36 (numeric)], 36 : [36 - att37 (numeric)], 37 : [37 - att38 (numeric)], 38 : [38 - att39 (numeric)], 39 : [39 - att40 (numeric)], 40 : [40 - att41 (numeric)], 41 : [41 - att42 (numeric)], 42 : [42 - att43 (numeric)], 43 : [43 - att44 (numeric)], 44 : [44 - att45 (numeric)], 45 : [45 - att46 (numeric)], 46 : [46 - att47 (numeric)], 47 : [47 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(mfeat-zernike)',\n", + " 'qualities': 'AutoCorrelation : 0.10008510008510009, CfsSubsetEval_DecisionStumpAUC : 0.85175133965445, CfsSubsetEval_DecisionStumpErrRate : 0.290905, CfsSubsetEval_DecisionStumpKappa : 0.6767716935011607, CfsSubsetEval_NaiveBayesAUC : 0.85175133965445, CfsSubsetEval_NaiveBayesErrRate : 0.290905, CfsSubsetEval_NaiveBayesKappa : 0.6767716935011607, CfsSubsetEval_kNN1NAUC : 0.85175133965445, CfsSubsetEval_kNN1NErrRate : 0.290905, CfsSubsetEval_kNN1NKappa : 0.6767716935011607, ClassEntropy : 3.3219261584997604, DecisionStumpAUC : 0.6754668115933892, DecisionStumpErrRate : 0.812069, DecisionStumpKappa : 0.09753866894690903, Dimensionality : 4.8e-05, EquivalentNumberOfAtts : nan, J48.00001.AUC : 0.849088585771426, J48.00001.ErrRate : 0.290867, J48.00001.Kappa : 0.676813849289966, J48.0001.AUC : 0.849088585771426, J48.0001.ErrRate : 0.290867, J48.0001.Kappa : 0.676813849289966, J48.001.AUC : 0.849088585771426, J48.001.ErrRate : 0.290867, J48.001.Kappa : 0.676813849289966, MajorityClassPercentage : 10.0289, MajorityClassSize : 100289.0, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 2.233970518023533, MaxMeansOfNumericAtts : 507.70880623642, MaxMutualInformation : nan, MaxNominalAttDistinctValues : 10.0, MaxSkewnessOfNumericAtts : 1.5034817027716705, MaxStdDevOfNumericAtts : 124.1855282325309, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : 0.28951301210976677, MeanMeansOfNumericAtts : 88.12085155366289, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : 10.0, MeanSkewnessOfNumericAtts : 0.7479523196167397, MeanStdDevOfNumericAtts : 39.712310030701126, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -0.9782672605059473, MinMeansOfNumericAtts : 0.078578113189, MinMutualInformation : nan, MinNominalAttDistinctValues : 10.0, MinSkewnessOfNumericAtts : 0.01647016113950717, MinStdDevOfNumericAtts : 0.06662622791953339, MinorityClassPercentage : 9.9797, MinorityClassSize : 99797.0, NaiveBayesAUC : 0.9529759652602074, NaiveBayesErrRate : 0.30974, NaiveBayesKappa : 0.6558491286808068, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 10.0, NumberOfFeatures : 48.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 47.0, NumberOfSymbolicFeatures : 1.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 97.91666666666666, PercentageOfSymbolicFeatures : 2.083333333333333, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -0.557033653469754, Quartile1MeansOfNumericAtts : 7.3240318655120005, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : 0.37749189297088553, Quartile1StdDevOfNumericAtts : 3.520743342449164, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : 0.023982765584618093, Quartile2MeansOfNumericAtts : 68.78292971411099, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.7367392622784438, Quartile2StdDevOfNumericAtts : 37.10292199410516, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 1.1153519446718891, Quartile3MeansOfNumericAtts : 126.526860062867, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 1.1253213168970015, Quartile3StdDevOfNumericAtts : 64.82513915389295, REPTreeDepth1AUC : 0.9369537126524267, REPTreeDepth1ErrRate : 0.271281, REPTreeDepth1Kappa : 0.6985768944698579, REPTreeDepth2AUC : 0.9369537126524267, REPTreeDepth2ErrRate : 0.271281, REPTreeDepth2Kappa : 0.6985768944698579, REPTreeDepth3AUC : 0.9369537126524267, REPTreeDepth3ErrRate : 0.271281, REPTreeDepth3Kappa : 0.6985768944698579, RandomTreeDepth1AUC : 0.8057229966595045, RandomTreeDepth1ErrRate : 0.349693, RandomTreeDepth1Kappa : 0.6114522203235186, RandomTreeDepth2AUC : 0.8057229966595045, RandomTreeDepth2ErrRate : 0.349693, RandomTreeDepth2Kappa : 0.6114522203235186, RandomTreeDepth3AUC : 0.8057229966595045, RandomTreeDepth3ErrRate : 0.349693, RandomTreeDepth3Kappa : 0.6114522203235186, StdvNominalAttDistinctValues : 0.0, kNN1NAUC : 0.8508827901265956, kNN1NErrRate : 0.268404, kNN1NKappa : 0.7017730453385455,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 23567.0,\n", + " 'MaxNominalAttDistinctValues': 4.0,\n", + " 'MinorityClassSize': 12447.0,\n", + " 'NumberOfClasses': 3.0,\n", + " 'NumberOfFeatures': 10.0,\n", + " 'NumberOfInstances': 55296.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 2.0,\n", + " 'NumberOfSymbolicFeatures': 8.0,\n", + " 'Unnamed: 0': 176,\n", + " 'did': 255,\n", + " 'features': '0 : [0 - Wifes_age (numeric)], 1 : [1 - Wifes_education (nominal)], 2 : [2 - Husbands_education (nominal)], 3 : [3 - Number_of_children_ever_born (numeric)], 4 : [4 - Wifes_religion (nominal)], 5 : [5 - Wifes_now_working%3F (nominal)], 6 : [6 - Husbands_occupation (nominal)], 7 : [7 - Standard-of-living_index (nominal)], 8 : [8 - Media_exposure (nominal)], 9 : [9 - Contraceptive_method_used (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(cmc)',\n", + " 'qualities': 'AutoCorrelation : 0.35509539741387103, CfsSubsetEval_DecisionStumpAUC : 0.7079557728582438, CfsSubsetEval_DecisionStumpErrRate : 0.44288917824074076, CfsSubsetEval_DecisionStumpKappa : 0.3113651469019138, CfsSubsetEval_NaiveBayesAUC : 0.7079557728582438, CfsSubsetEval_NaiveBayesErrRate : 0.44288917824074076, CfsSubsetEval_NaiveBayesKappa : 0.3113651469019138, CfsSubsetEval_kNN1NAUC : 0.7079557728582438, CfsSubsetEval_kNN1NErrRate : 0.44288917824074076, CfsSubsetEval_kNN1NKappa : 0.3113651469019138, ClassEntropy : 1.5386700242390772, DecisionStumpAUC : 0.5778705088269857, DecisionStumpErrRate : 0.5523365162037037, DecisionStumpKappa : 0.15217830175565578, Dimensionality : 0.0001808449074074074, EquivalentNumberOfAtts : 54.028852146140544, J48.00001.AUC : 0.7009205299199321, J48.00001.ErrRate : 0.4432508680555556, J48.00001.Kappa : 0.30994558745610484, J48.0001.AUC : 0.7009205299199321, J48.0001.ErrRate : 0.4432508680555556, J48.0001.Kappa : 0.30994558745610484, J48.001.AUC : 0.7009205299199321, J48.001.ErrRate : 0.4432508680555556, J48.001.Kappa : 0.30994558745610484, MajorityClassPercentage : 42.6197193287037, MajorityClassSize : 23567.0, MaxAttributeEntropy : 1.8711440836777449, MaxKurtosisOfNumericAtts : 0.7706791387138678, MaxMeansOfNumericAtts : 32.56203415013744, MaxMutualInformation : 0.07116423354502, MaxNominalAttDistinctValues : 4.0, MaxSkewnessOfNumericAtts : 1.010292874296068, MaxStdDevOfNumericAtts : 8.227193796131264, MeanAttributeEntropy : 1.2302210371344358, MeanKurtosisOfNumericAtts : -0.09287618208151649, MeanMeansOfNumericAtts : 17.913357144033927, MeanMutualInformation : 0.02847867321106857, MeanNoiseToSignalRatio : 42.197975833238466, MeanNominalAttDistinctValues : 3.125, MeanSkewnessOfNumericAtts : 0.6349896367565343, MeanStdDevOfNumericAtts : 5.283469130973664, MinAttributeEntropy : 0.39140313050723713, MinKurtosisOfNumericAtts : -0.9564315028769008, MinMeansOfNumericAtts : 3.2646801379304105, MinMutualInformation : 0.00311962294887, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : 0.2596863992170007, MinStdDevOfNumericAtts : 2.3397444658160653, MinorityClassPercentage : 22.509765625, MinorityClassSize : 12447.0, NaiveBayesAUC : 0.6899931165081491, NaiveBayesErrRate : 0.48835358796296297, NaiveBayesKappa : 0.25838684658393896, NumberOfBinaryFeatures : 3.0, NumberOfClasses : 3.0, NumberOfFeatures : 10.0, NumberOfInstances : 55296.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 2.0, NumberOfSymbolicFeatures : 8.0, PercentageOfBinaryFeatures : 30.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 20.0, PercentageOfSymbolicFeatures : 80.0, Quartile1AttributeEntropy : 0.612350853498725, Quartile1KurtosisOfNumericAtts : -0.9564315028769008, Quartile1MeansOfNumericAtts : 3.2646801379304105, Quartile1MutualInformation : 0.0100154702513, Quartile1SkewnessOfNumericAtts : 0.2596863992170007, Quartile1StdDevOfNumericAtts : 2.3397444658160653, Quartile2AttributeEntropy : 1.4674317458734099, Quartile2KurtosisOfNumericAtts : -0.09287618208151649, Quartile2MeansOfNumericAtts : 17.913357144033924, Quartile2MutualInformation : 0.03022761438771, Quartile2SkewnessOfNumericAtts : 0.6349896367565344, Quartile2StdDevOfNumericAtts : 5.283469130973664, Quartile3AttributeEntropy : 1.7667707988363959, Quartile3KurtosisOfNumericAtts : 0.7706791387138678, Quartile3MeansOfNumericAtts : 32.56203415013744, Quartile3MutualInformation : 0.03923562284896, Quartile3SkewnessOfNumericAtts : 1.010292874296068, Quartile3StdDevOfNumericAtts : 8.227193796131264, REPTreeDepth1AUC : 0.7074435114638705, REPTreeDepth1ErrRate : 0.45361328125, REPTreeDepth1Kappa : 0.2924086118531553, REPTreeDepth2AUC : 0.7074435114638705, REPTreeDepth2ErrRate : 0.45361328125, REPTreeDepth2Kappa : 0.2924086118531553, REPTreeDepth3AUC : 0.7074435114638705, REPTreeDepth3ErrRate : 0.45361328125, REPTreeDepth3Kappa : 0.2924086118531553, RandomTreeDepth1AUC : 0.5902685503090401, RandomTreeDepth1ErrRate : 0.5304904513888888, RandomTreeDepth1Kappa : 0.1795787598849843, RandomTreeDepth2AUC : 0.5902685503090401, RandomTreeDepth2ErrRate : 0.5304904513888888, RandomTreeDepth2Kappa : 0.1795787598849843, RandomTreeDepth3AUC : 0.5902685503090401, RandomTreeDepth3ErrRate : 0.5304904513888888, RandomTreeDepth3Kappa : 0.1795787598849843, StdvNominalAttDistinctValues : 0.9910312089651149, kNN1NAUC : 0.5928637737633861, kNN1NErrRate : 0.5276150173611112, kNN1NKappa : 0.18486530131892978,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 630221.0,\n", + " 'MaxNominalAttDistinctValues': 6.0,\n", + " 'MinorityClassSize': 369779.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 23.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 7.0,\n", + " 'NumberOfSymbolicFeatures': 16.0,\n", + " 'Unnamed: 0': 178,\n", + " 'did': 257,\n", + " 'features': '0 : [0 - surgery (nominal)], 1 : [1 - Age (nominal)], 2 : [2 - rectal_temperature (numeric)], 3 : [3 - pulse (numeric)], 4 : [4 - respiratory_rate (numeric)], 5 : [5 - temp_extremities (nominal)], 6 : [6 - peripheral_pulse (nominal)], 7 : [7 - mucous_membranes (nominal)], 8 : [8 - capillary_refill_time (nominal)], 9 : [9 - pain (nominal)], 10 : [10 - peristalsis (nominal)], 11 : [11 - abdominal_distension (nominal)], 12 : [12 - nasogastric_tube (nominal)], 13 : [13 - nasogastric_reflux (nominal)], 14 : [14 - nasogastric_reflux_PH (numeric)], 15 : [15 - rectal_examination (nominal)], 16 : [16 - abdomen (nominal)], 17 : [17 - packed_cell_volume (numeric)], 18 : [18 - total_protein (numeric)], 19 : [19 - abdominocentesis_appearance (nominal)], 20 : [20 - abdomcentesis_total_protein (numeric)], 21 : [21 - outcome (nominal)], 22 : [22 - surgical_lesion (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(colic)',\n", + " 'qualities': 'AutoCorrelation : 0.5336065336065337, CfsSubsetEval_DecisionStumpAUC : 0.9206842128679066, CfsSubsetEval_DecisionStumpErrRate : 0.12393, CfsSubsetEval_DecisionStumpKappa : 0.7321887731482503, CfsSubsetEval_NaiveBayesAUC : 0.9206842128679066, CfsSubsetEval_NaiveBayesErrRate : 0.12393, CfsSubsetEval_NaiveBayesKappa : 0.7321887731482503, CfsSubsetEval_kNN1NAUC : 0.9206842128679066, CfsSubsetEval_kNN1NErrRate : 0.12393, CfsSubsetEval_kNN1NKappa : 0.7321887731482503, ClassEntropy : 0.9505022518627755, DecisionStumpAUC : 0.811385945949615, DecisionStumpErrRate : 0.186838, DecisionStumpKappa : 0.608630118017707, Dimensionality : 2.3e-05, EquivalentNumberOfAtts : 15.685590400027284, J48.00001.AUC : 0.9108771598016026, J48.00001.ErrRate : 0.1097, J48.00001.Kappa : 0.7630655125498026, J48.0001.AUC : 0.9108771598016026, J48.0001.ErrRate : 0.1097, J48.0001.Kappa : 0.7630655125498026, J48.001.AUC : 0.9108771598016026, J48.001.ErrRate : 0.1097, J48.001.Kappa : 0.7630655125498026, MajorityClassPercentage : 63.0221, MajorityClassSize : 630221.0, MaxAttributeEntropy : 2.3152113001090875, MaxKurtosisOfNumericAtts : 9.122841875050248, MaxMeansOfNumericAtts : 73.500319272697, MaxMutualInformation : 0.28348205762599, MaxNominalAttDistinctValues : 6.0, MaxSkewnessOfNumericAtts : 2.836944443581676, MaxStdDevOfNumericAtts : 29.62277362827926, MeanAttributeEntropy : 1.4250647630212925, MeanKurtosisOfNumericAtts : 2.219748656802744, MeanMeansOfNumericAtts : 32.342938429725855, MeanMutualInformation : 0.060597161319546004, MeanNoiseToSignalRatio : 22.51702178764649, MeanNominalAttDistinctValues : 3.5625, MeanSkewnessOfNumericAtts : 0.546863770829829, MeanStdDevOfNumericAtts : 12.953669183737068, MinAttributeEntropy : 0.3972766056576088, MinKurtosisOfNumericAtts : -0.7051628596798087, MinMeansOfNumericAtts : 2.32043489956, MinMutualInformation : 0.00037890773564, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : -2.3974153751221685, MinStdDevOfNumericAtts : 0.6536398857393563, MinorityClassPercentage : 36.977900000000005, MinorityClassSize : 369779.0, NaiveBayesAUC : 0.9170219539583531, NaiveBayesErrRate : 0.148509, NaiveBayesKappa : 0.6814474093166725, NumberOfBinaryFeatures : 3.0, NumberOfClasses : 2.0, NumberOfFeatures : 23.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 7.0, NumberOfSymbolicFeatures : 16.0, PercentageOfBinaryFeatures : 13.043478260869565, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 30.434782608695656, PercentageOfSymbolicFeatures : 69.56521739130434, Quartile1AttributeEntropy : 1.1136241621098728, Quartile1KurtosisOfNumericAtts : -0.2058503160800167, Quartile1MeansOfNumericAtts : 6.76783748091, Quartile1MutualInformation : 0.02066693266558, Quartile1SkewnessOfNumericAtts : 0.24687711995798886, Quartile1StdDevOfNumericAtts : 1.308402984602055, Quartile2AttributeEntropy : 1.413232317341817, Quartile2KurtosisOfNumericAtts : 0.39742215707381945, Quartile2MeansOfNumericAtts : 35.145741562823, Quartile2MutualInformation : 0.0324021318614, Quartile2SkewnessOfNumericAtts : 0.8341479430006891, Quartile2StdDevOfNumericAtts : 10.372094601487213, Quartile3AttributeEntropy : 1.6585838471925973, Quartile3KurtosisOfNumericAtts : 5.635942611844879, Quartile3MeansOfNumericAtts : 45.445245627533, Quartile3MutualInformation : 0.07378886654644, Quartile3SkewnessOfNumericAtts : 0.9210740111176681, Quartile3StdDevOfNumericAtts : 27.25328161172137, REPTreeDepth1AUC : 0.938486918822373, REPTreeDepth1ErrRate : 0.111603, REPTreeDepth1Kappa : 0.7585901803733961, REPTreeDepth2AUC : 0.938486918822373, REPTreeDepth2ErrRate : 0.111603, REPTreeDepth2Kappa : 0.7585901803733961, REPTreeDepth3AUC : 0.938486918822373, REPTreeDepth3ErrRate : 0.111603, REPTreeDepth3Kappa : 0.7585901803733961, RandomTreeDepth1AUC : 0.8392734962743577, RandomTreeDepth1ErrRate : 0.153177, RandomTreeDepth1Kappa : 0.6712975370465432, RandomTreeDepth2AUC : 0.8392734962743577, RandomTreeDepth2ErrRate : 0.153177, RandomTreeDepth2Kappa : 0.6712975370465432, RandomTreeDepth3AUC : 0.8392734962743577, RandomTreeDepth3ErrRate : 0.153177, RandomTreeDepth3Kappa : 0.6712975370465432, StdvNominalAttDistinctValues : 1.1528949070347507, kNN1NAUC : 0.8431578483210511, kNN1NErrRate : 0.14619, kNN1NKappa : 0.686335639427079,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 554008.0,\n", + " 'MaxNominalAttDistinctValues': 14.0,\n", + " 'MinorityClassSize': 445992.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 16.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 6.0,\n", + " 'NumberOfSymbolicFeatures': 10.0,\n", + " 'Unnamed: 0': 179,\n", + " 'did': 258,\n", + " 'features': '0 : [0 - A1 (nominal)], 1 : [1 - A2 (numeric)], 2 : [2 - A3 (numeric)], 3 : [3 - A4 (nominal)], 4 : [4 - A5 (nominal)], 5 : [5 - A6 (nominal)], 6 : [6 - A7 (nominal)], 7 : [7 - A8 (numeric)], 8 : [8 - A9 (nominal)], 9 : [9 - A10 (nominal)], 10 : [10 - A11 (numeric)], 11 : [11 - A12 (nominal)], 12 : [12 - A13 (nominal)], 13 : [13 - A14 (numeric)], 14 : [14 - A15 (numeric)], 15 : [15 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(credit-a)',\n", + " 'qualities': 'AutoCorrelation : 0.5055695055695055, CfsSubsetEval_DecisionStumpAUC : 0.9236428160342482, CfsSubsetEval_DecisionStumpErrRate : 0.123302, CfsSubsetEval_DecisionStumpKappa : 0.7500795603564689, CfsSubsetEval_NaiveBayesAUC : 0.9236428160342482, CfsSubsetEval_NaiveBayesErrRate : 0.123302, CfsSubsetEval_NaiveBayesKappa : 0.7500795603564689, CfsSubsetEval_kNN1NAUC : 0.9236428160342482, CfsSubsetEval_kNN1NErrRate : 0.123302, CfsSubsetEval_kNN1NKappa : 0.7500795603564689, ClassEntropy : 0.9915672663814072, DecisionStumpAUC : 0.8541206799506694, DecisionStumpErrRate : 0.151383, DecisionStumpKappa : 0.6982638524973298, Dimensionality : 1.6e-05, EquivalentNumberOfAtts : 11.804035448329977, J48.00001.AUC : 0.9264494052977083, J48.00001.ErrRate : 0.109414, J48.00001.Kappa : 0.778334125212909, J48.0001.AUC : 0.9264494052977083, J48.0001.ErrRate : 0.109414, J48.0001.Kappa : 0.778334125212909, J48.001.AUC : 0.9264494052977083, J48.001.ErrRate : 0.109414, J48.001.Kappa : 0.778334125212909, MajorityClassPercentage : 55.4008, MajorityClassSize : 554008.0, MaxAttributeEntropy : 3.5228699067646363, MaxKurtosisOfNumericAtts : 16.195299364951175, MaxMeansOfNumericAtts : 1041.64972134852, MaxMutualInformation : 0.40292015145978, MaxNominalAttDistinctValues : 14.0, MaxSkewnessOfNumericAtts : 2.8686822783720123, MaxStdDevOfNumericAtts : 5269.332591656998, MeanAttributeEntropy : 1.3083589383538596, MeanKurtosisOfNumericAtts : 5.547874049739754, MeanMeansOfNumericAtts : 210.67665656673768, MeanMutualInformation : 0.08400239652971334, MeanNoiseToSignalRatio : 14.575257283178424, MeanNominalAttDistinctValues : 4.3, MeanSkewnessOfNumericAtts : 1.9186529372632175, MeanStdDevOfNumericAtts : 911.1448879076281, MinAttributeEntropy : 0.5558554785178321, MinKurtosisOfNumericAtts : 0.5691563826713018, MinMeansOfNumericAtts : 2.197265006407, MinMutualInformation : 0.00053302906036, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : 1.0770283528918647, MinStdDevOfNumericAtts : 3.3431881209706527, MinorityClassPercentage : 44.5992, MinorityClassSize : 445992.0, NaiveBayesAUC : 0.9079508400354774, NaiveBayesErrRate : 0.16913, NaiveBayesKappa : 0.6525505293390783, NumberOfBinaryFeatures : 5.0, NumberOfClasses : 2.0, NumberOfFeatures : 16.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 6.0, NumberOfSymbolicFeatures : 10.0, PercentageOfBinaryFeatures : 31.25, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 37.5, PercentageOfSymbolicFeatures : 62.5, Quartile1AttributeEntropy : 0.8411127085612495, Quartile1KurtosisOfNumericAtts : 0.8640729637588276, Quartile1MeansOfNumericAtts : 2.3588195008915056, Quartile1MutualInformation : 0.0050729212277349995, Quartile1SkewnessOfNumericAtts : 1.267997504852106, Quartile1StdDevOfNumericAtts : 4.499114428848817, Quartile2AttributeEntropy : 0.9835837346010516, Quartile2KurtosisOfNumericAtts : 3.2235730941015897, Quartile2MeansOfNumericAtts : 18.040640760645502, Quartile2MutualInformation : 0.02799897137278, Quartile2SkewnessOfNumericAtts : 1.7803984198040648, Quartile2StdDevOfNumericAtts : 8.460795390952773, Quartile3AttributeEntropy : 1.5751281782620663, Quartile3KurtosisOfNumericAtts : 10.883772745106443, Quartile3MeansOfNumericAtts : 396.70168073099626, Quartile3MutualInformation : 0.1241908979419, Quartile3SkewnessOfNumericAtts : 2.722487767494337, Quartile3StdDevOfNumericAtts : 1446.623798180064, REPTreeDepth1AUC : 0.9456538530396582, REPTreeDepth1ErrRate : 0.11094, REPTreeDepth1Kappa : 0.7753240635982098, REPTreeDepth2AUC : 0.9456538530396582, REPTreeDepth2ErrRate : 0.11094, REPTreeDepth2Kappa : 0.7753240635982098, REPTreeDepth3AUC : 0.9456538530396582, REPTreeDepth3ErrRate : 0.11094, REPTreeDepth3Kappa : 0.7753240635982098, RandomTreeDepth1AUC : 0.8511524014025537, RandomTreeDepth1ErrRate : 0.147639, RandomTreeDepth1Kappa : 0.7011429677010672, RandomTreeDepth2AUC : 0.8511524014025537, RandomTreeDepth2ErrRate : 0.147639, RandomTreeDepth2Kappa : 0.7011429677010672, RandomTreeDepth3AUC : 0.8511524014025537, RandomTreeDepth3ErrRate : 0.147639, RandomTreeDepth3Kappa : 0.7011429677010672, StdvNominalAttDistinctValues : 4.029061098237817, kNN1NAUC : 0.8411233385747212, kNN1NErrRate : 0.155599, kNN1NKappa : 0.6842183003873852,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 44.0,\n", + " 'MaxNominalAttDistinctValues': 54.0,\n", + " 'MinorityClassSize': 10.0,\n", + " 'NumberOfClasses': 6.0,\n", + " 'NumberOfFeatures': 12.0,\n", + " 'NumberOfInstances': 105.0,\n", + " 'NumberOfInstancesWithMissingValues': 35.0,\n", + " 'NumberOfMissingValues': 61.0,\n", + " 'NumberOfNumericFeatures': 0.0,\n", + " 'NumberOfSymbolicFeatures': 12.0,\n", + " 'Unnamed: 0': 218,\n", + " 'description': '**Author**: Yoram Reich\",\"Steven J. Fenves \\n \\n**Source**: [original](http://openml.org/d/19) - \\n**Please cite**: \\n\\nPittsburgh bridges \\n\\nThis version is derived from version 2 (the discretized version) by removing all instances with missing values in the last (target) attribute. The bridges dataset is originally not a classification dataset, put is used so extensively in the literature, using the last attribute as the target attribute. However, this attribute has missing values, which may lead to confusing benchmarking result. Therefore, these instances have been removed. \\n\\nSources: \\n-- Yoram Reich and Steven J. Fenves Department of Civil Engineering and Engineering Design Research Center Carnegie Mellon University Pittsburgh, PA 15213 Compiled from various sources. \\n-- Date: 1 August 1990 \\n\\nAttribute Information: The type field state whether a property is continuous/integer (c) or nominal (n). For properties with c,n type, the range of continuous numbers is given first and the possible values of the nominal follow the semi-colon. \\n\\nname type possible values comments \\n------------------------------------------------------------------------ \\n1. IDENTIF - - identifier of the examples \\n2. RIVER n A, M, O \\n3. LOCATION n 1 to 52 \\n4. ERECTED c,n 1818-1986 - CRAFTS, EMERGING, MATURE, MODERN \\n5. PURPOSE n WALK, AQUEDUCT, RR, HIGHWAY \\n6. LENGTH c,n 804-4558 - SHORT, MEDIUM, LONG \\n7. LANES c,n 1, 2, 4, 6 - 1, 2, 4, 6 \\n8. CLEAR-G n N, G \\n9. T-OR-D n THROUGH, DECK \\n10. MATERIAL n WOOD, IRON, STEEL \\n11. SPAN n SHORT, MEDIUM, LONG \\n12. REL-L n S, S-F, F \\n13. TYPE n WOOD, SUSPEN, SIMPLE-T, ARCH, CANTILEV, CONT-T',\n", + " 'did': 328,\n", + " 'features': '0 : [0 - IDENTIF (nominal)], 1 : [1 - RIVER (nominal)], 2 : [2 - LOCATION (nominal)], 3 : [3 - ERECTED (nominal)], 4 : [4 - PURPOSE (nominal)], 5 : [5 - LENGTH (nominal)], 6 : [6 - LANES (nominal)], 7 : [7 - CLEAR-G (nominal)], 8 : [8 - T-OR-D (nominal)], 9 : [9 - MATERIAL (nominal)], 10 : [10 - SPAN (nominal)], 11 : [11 - REL-L (nominal)], 12 : [12 - TYPE (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'bridges',\n", + " 'qualities': 'AutoCorrelation : 0.49038461538461536, CfsSubsetEval_DecisionStumpAUC : 0.6873916110699549, CfsSubsetEval_DecisionStumpErrRate : 0.4380952380952381, CfsSubsetEval_DecisionStumpKappa : 0.2985768225384839, CfsSubsetEval_NaiveBayesAUC : 0.6873916110699549, CfsSubsetEval_NaiveBayesErrRate : 0.4380952380952381, CfsSubsetEval_NaiveBayesKappa : 0.2985768225384839, CfsSubsetEval_kNN1NAUC : 0.6873916110699549, CfsSubsetEval_kNN1NErrRate : 0.4380952380952381, CfsSubsetEval_kNN1NKappa : 0.2985768225384839, ClassEntropy : 2.317602811811176, DecisionStumpAUC : 0.6593219297314425, DecisionStumpErrRate : 0.42857142857142855, DecisionStumpKappa : 0.3105209397344228, Dimensionality : 0.11428571428571428, EquivalentNumberOfAtts : 4.8158526820324905, J48.00001.AUC : 0.79171068395272, J48.00001.ErrRate : 0.3904761904761905, J48.00001.Kappa : 0.43607545192559616, J48.0001.AUC : 0.79171068395272, J48.0001.ErrRate : 0.3904761904761905, J48.0001.Kappa : 0.43607545192559616, J48.001.AUC : 0.79171068395272, J48.001.ErrRate : 0.3904761904761905, J48.001.Kappa : 0.43607545192559616, MajorityClassPercentage : 41.904761904761905, MajorityClassSize : 44.0, MaxAttributeEntropy : 5.578205398474268, MaxKurtosisOfNumericAtts : nan, MaxMeansOfNumericAtts : nan, MaxMutualInformation : 1.55784626408911, MaxNominalAttDistinctValues : 54.0, MaxSkewnessOfNumericAtts : nan, MaxStdDevOfNumericAtts : nan, MeanAttributeEntropy : 1.6872699252760597, MeanKurtosisOfNumericAtts : nan, MeanMeansOfNumericAtts : nan, MeanMutualInformation : 0.48124454065174, MeanNoiseToSignalRatio : 2.506055202186862, MeanNominalAttDistinctValues : 7.666666666666667, MeanSkewnessOfNumericAtts : nan, MeanStdDevOfNumericAtts : nan, MinAttributeEntropy : 0.5916727785823275, MinKurtosisOfNumericAtts : nan, MinMeansOfNumericAtts : nan, MinMutualInformation : 0.16891819380606, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : nan, MinStdDevOfNumericAtts : nan, MinorityClassPercentage : 9.523809523809524, MinorityClassSize : 10.0, NaiveBayesAUC : 0.8578793089705496, NaiveBayesErrRate : 0.3238095238095238, NaiveBayesKappa : 0.5645810464690816, NumberOfBinaryFeatures : 2.0, NumberOfClasses : 6.0, NumberOfFeatures : 12.0, NumberOfInstances : 105.0, NumberOfInstancesWithMissingValues : 35.0, NumberOfMissingValues : 61.0, NumberOfNumericFeatures : 0.0, NumberOfSymbolicFeatures : 12.0, PercentageOfBinaryFeatures : 16.666666666666664, PercentageOfInstancesWithMissingValues : 33.33333333333333, PercentageOfMissingValues : 4.841269841269842, PercentageOfNumericFeatures : 0.0, PercentageOfSymbolicFeatures : 100.0, Quartile1AttributeEntropy : 1.0731491283683547, Quartile1KurtosisOfNumericAtts : nan, Quartile1MeansOfNumericAtts : nan, Quartile1MutualInformation : 0.23751139345904, Quartile1SkewnessOfNumericAtts : nan, Quartile1StdDevOfNumericAtts : nan, Quartile2AttributeEntropy : 1.4866836360458784, Quartile2KurtosisOfNumericAtts : nan, Quartile2MeansOfNumericAtts : nan, Quartile2MutualInformation : 0.36240410933397, Quartile2SkewnessOfNumericAtts : nan, Quartile2StdDevOfNumericAtts : nan, Quartile3AttributeEntropy : 1.5829772831026911, Quartile3KurtosisOfNumericAtts : nan, Quartile3MeansOfNumericAtts : nan, Quartile3MutualInformation : 0.60363739984227, Quartile3SkewnessOfNumericAtts : nan, Quartile3StdDevOfNumericAtts : nan, REPTreeDepth1AUC : 0.5279250475484106, REPTreeDepth1ErrRate : 0.6190476190476191, REPTreeDepth1Kappa : 0.02262637834741511, REPTreeDepth2AUC : 0.5279250475484106, REPTreeDepth2ErrRate : 0.6190476190476191, REPTreeDepth2Kappa : 0.02262637834741511, REPTreeDepth3AUC : 0.5279250475484106, REPTreeDepth3ErrRate : 0.6190476190476191, REPTreeDepth3Kappa : 0.02262637834741511, RandomTreeDepth1AUC : 0.7698173171361247, RandomTreeDepth1ErrRate : 0.42857142857142855, RandomTreeDepth1Kappa : 0.37582562747688236, RandomTreeDepth2AUC : 0.7698173171361247, RandomTreeDepth2ErrRate : 0.42857142857142855, RandomTreeDepth2Kappa : 0.37582562747688236, RandomTreeDepth3AUC : 0.7698173171361247, RandomTreeDepth3ErrRate : 0.42857142857142855, RandomTreeDepth3Kappa : 0.37582562747688236, StdvNominalAttDistinctValues : 14.63081016419139, kNN1NAUC : 0.7781376695997415, kNN1NErrRate : 0.3904761904761905, kNN1NKappa : 0.47881355932203395,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 4},\n", + " {'MaxNominalAttDistinctValues': 2.0,\n", + " 'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 10.0,\n", + " 'NumberOfInstances': 159.0,\n", + " 'NumberOfInstancesWithMissingValues': 6.0,\n", + " 'NumberOfMissingValues': 6.0,\n", + " 'NumberOfNumericFeatures': 5.0,\n", + " 'NumberOfSymbolicFeatures': 5.0,\n", + " 'Unnamed: 0': 363,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nanalcatdata A collection of data sets used in the book \"Analyzing Categorical Data,\"\\nby Jeffrey S. Simonoff, Springer-Verlag, New York, 2003. The submission\\nconsists of a zip file containing two versions of each of 84 data sets,\\nplus this README file. Each data set is given in comma-delimited ASCII\\n(.csv) form, and Microsoft Excel (.xls) form.\\n\\nNOTICE: These data sets may be used freely for scientific, educational and/or\\nnoncommercial purposes, provided suitable acknowledgment is given (by citing\\nthe above-named reference).\\n\\nFurther details concerning the book, including information on statistical software\\n(including sample S-PLUS/R and SAS code), are available at the web site\\n\\nhttp://www.stern.nyu.edu/~jsimonof/AnalCatData\\n\\n\\nInformation about the dataset\\nCLASSTYPE: numeric\\nCLASSINDEX: last\\n\\n\\nNote: Quotes, Single-Quotes and Backslashes were removed, Blanks replaced\\nwith Underscores',\n", + " 'did': 506,\n", + " 'features': '0 : [0 - Married (nominal)], 1 : [1 - Age (numeric)], 2 : [2 - Years_of_education (numeric)], 3 : [3 - Male (nominal)], 4 : [4 - Religious (nominal)], 5 : [5 - Sex_partners (numeric)], 6 : [6 - Income (numeric)], 7 : [7 - Drug_use (nominal)], 8 : [8 - Same_sex_relations (nominal)], 9 : [9 - AIDS_know (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'analcatdata_gsssexsurvey',\n", + " 'qualities': 'AutoCorrelation : 0.47468354430379744, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.06289308176100629, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 8.886923444813059, MaxMeansOfNumericAtts : 24555.55555555555, MaxMutualInformation : nan, MaxNominalAttDistinctValues : 2.0, MaxSkewnessOfNumericAtts : 2.9101190601750293, MaxStdDevOfNumericAtts : 17224.50495388568, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : 3.9091211561999764, MeanMeansOfNumericAtts : 4922.47966457023, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : 2.0, MeanSkewnessOfNumericAtts : 1.5446898401998712, MeanStdDevOfNumericAtts : 3448.5649830178054, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : 0.5279947743546121, MinMeansOfNumericAtts : 0.3396226415094337, MinMutualInformation : nan, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : 0.10326629350803894, MinStdDevOfNumericAtts : 0.7533065397135874, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 5.0, NumberOfClasses : 0.0, NumberOfFeatures : 10.0, NumberOfInstances : 159.0, NumberOfInstancesWithMissingValues : 6.0, NumberOfMissingValues : 6.0, NumberOfNumericFeatures : 5.0, NumberOfSymbolicFeatures : 5.0, PercentageOfBinaryFeatures : 50.0, PercentageOfInstancesWithMissingValues : 3.7735849056603774, PercentageOfMissingValues : 0.37735849056603776, PercentageOfNumericFeatures : 50.0, PercentageOfSymbolicFeatures : 50.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : 0.5376383551039363, Quartile1MeansOfNumericAtts : 1.5251572327044023, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : 0.4995955249738286, Quartile1StdDevOfNumericAtts : 1.7327600338286375, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : 1.7507886384047597, Quartile2MeansOfNumericAtts : 13.716981132075471, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 1.1388718940504805, Quartile2StdDevOfNumericAtts : 3.7893019161873753, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 8.359770216193624, Quartile3MeansOfNumericAtts : 12297.815513626832, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 2.792693128500609, Quartile3StdDevOfNumericAtts : 8617.785046552592, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : 0.0, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 11.0,\n", + " 'NumberOfInstances': 1000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 11.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 476,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 623,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c4_1000_10',\n", + " 'qualities': 'AutoCorrelation : -0.1431321730294305, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.011, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 1.9053017190944588, MaxMeansOfNumericAtts : 1.1514999891104605e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 1.4130003640032112, MaxStdDevOfNumericAtts : 1.0000000006680836, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.5092063183629136, MeanMeansOfNumericAtts : 5.750025164341646e-11, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.26265382761299244, MeanStdDevOfNumericAtts : 0.9999999994973643, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.252232783383241, MinMeansOfNumericAtts : -8.97900024332543e-10, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.17282355890184273, MinStdDevOfNumericAtts : 0.9999999983660289, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 11.0, NumberOfInstances : 1000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 11.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2467638014696125, Quartile1MeansOfNumericAtts : -3.7560000887282287e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.01865601158096305, Quartile1StdDevOfNumericAtts : 0.9999999988117013, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.2102392844900602, Quartile2MeansOfNumericAtts : 6.673004537827155e-11, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.024246496361142147, Quartile2StdDevOfNumericAtts : 0.9999999994149671, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 0.17473277546107502, Quartile3MeansOfNumericAtts : 3.340999302769987e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.7497638661886735, Quartile3StdDevOfNumericAtts : 1.0000000002006977, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 6.0,\n", + " 'NumberOfInstances': 100.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 6.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 477,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 624,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c0_100_5',\n", + " 'qualities': 'AutoCorrelation : -0.007891897050505388, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.06, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.5528786452668188, MaxMeansOfNumericAtts : 1.2699999930865235e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.354942254726109, MaxStdDevOfNumericAtts : 1.0000000038649643, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.9807632643793867, MeanMeansOfNumericAtts : -7.253333315221377e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.0024185977273379184, MeanStdDevOfNumericAtts : 1.0000000006047896, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3282931476128914, MinMeansOfNumericAtts : -3.340000052087788e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.133307630216264, MinStdDevOfNumericAtts : 0.9999999966147466, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 6.0, NumberOfInstances : 100.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 6.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.1854586273829244, Quartile1MeansOfNumericAtts : -2.455000023826592e-09, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.13020953311533182, Quartile1StdDevOfNumericAtts : 0.9999999982648644, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -0.9788320801085673, Quartile2MeansOfNumericAtts : -3.8649997424755624e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : -0.05905904653100631, Quartile2StdDevOfNumericAtts : 1.0000000012145431, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -0.8191420457216096, Quartile3MeansOfNumericAtts : 8.057500228489389e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.1188644804299394, Quartile3StdDevOfNumericAtts : 1.0000000023950186, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 51.0,\n", + " 'NumberOfInstances': 500.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 51.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 479,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 626,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)], 26 : [26 - oz27 (numeric)], 27 : [27 - oz28 (numeric)], 28 : [28 - oz29 (numeric)], 29 : [29 - oz30 (numeric)], 30 : [30 - oz31 (numeric)], 31 : [31 - oz32 (numeric)], 32 : [32 - oz33 (numeric)], 33 : [33 - oz34 (numeric)], 34 : [34 - oz35 (numeric)], 35 : [35 - oz36 (numeric)], 36 : [36 - oz37 (numeric)], 37 : [37 - oz38 (numeric)], 38 : [38 - oz39 (numeric)], 39 : [39 - oz40 (numeric)], 40 : [40 - oz41 (numeric)], 41 : [41 - oz42 (numeric)], 42 : [42 - oz43 (numeric)], 43 : [43 - oz44 (numeric)], 44 : [44 - oz45 (numeric)], 45 : [45 - oz46 (numeric)], 46 : [46 - oz47 (numeric)], 47 : [47 - oz48 (numeric)], 48 : [48 - oz49 (numeric)], 49 : [49 - oz50 (numeric)], 50 : [50 - oz51 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c2_500_50',\n", + " 'qualities': 'AutoCorrelation : -0.1268945054000004, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.102, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 0.40050970922613294, MaxMeansOfNumericAtts : 1.7419999434054034e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.8257288820876238, MaxStdDevOfNumericAtts : 1.0000000017470212, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.1476523531550133, MeanMeansOfNumericAtts : 1.186981529806204e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.015138124990254022, MeanStdDevOfNumericAtts : 0.9999999999150228, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3001476274593584, MinMeansOfNumericAtts : -1.673260040746527e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.58640368879216, MinStdDevOfNumericAtts : 0.9999999975311687, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 51.0, NumberOfInstances : 500.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 51.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2376778640025938, Quartile1MeansOfNumericAtts : -3.560000560454135e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.04717388403461312, Quartile1StdDevOfNumericAtts : 0.9999999993775461, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.2049058684259772, Quartile2MeansOfNumericAtts : 6.987996861718138e-11, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.011985080789238782, Quartile2StdDevOfNumericAtts : 1.0000000000834948, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.1491180708665234, Quartile3MeansOfNumericAtts : 4.622000169263174e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.061014959488354, Quartile3StdDevOfNumericAtts : 1.0000000007198544, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 11.0,\n", + " 'NumberOfInstances': 500.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 11.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 480,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 627,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c2_500_10',\n", + " 'qualities': 'AutoCorrelation : -0.11550001540681405, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.022, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 0.38517831285595205, MaxMeansOfNumericAtts : 9.482000321581196e-10, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.7972004342299485, MaxStdDevOfNumericAtts : 1.0000000018286623, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.9569272304070873, MeanMeansOfNumericAtts : 8.195455355971981e-11, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.04714906419946629, MeanStdDevOfNumericAtts : 0.9999999996969955, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.2705783053188873, MinMeansOfNumericAtts : -1.1432000008859687e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.6005201001977205, MinStdDevOfNumericAtts : 0.9999999981064633, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 11.0, NumberOfInstances : 500.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 11.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2332327972724195, Quartile1MeansOfNumericAtts : -4.774000403884316e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.034254004941503026, Quartile1StdDevOfNumericAtts : 0.9999999984859435, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.2099144610876031, Quartile2MeansOfNumericAtts : 9.659995803268374e-11, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.06852484125977333, Quartile2StdDevOfNumericAtts : 0.9999999996020292, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -0.7211901728348078, Quartile3MeansOfNumericAtts : 8.11800013877928e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.08999135279171033, Quartile3StdDevOfNumericAtts : 1.000000001072244, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 6.0,\n", + " 'NumberOfInstances': 1000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 6.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 481,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 628,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c3_1000_5',\n", + " 'qualities': 'AutoCorrelation : -0.12870232880826782, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.006, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 1.1536224985085624, MaxMeansOfNumericAtts : 5.123000166307179e-10, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 1.2469418106575223, MaxStdDevOfNumericAtts : 1.000000000656039, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.3339947179631272, MeanMeansOfNumericAtts : -2.5390006153092067e-11, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.27280678404407294, MeanStdDevOfNumericAtts : 0.9999999998249032, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.1996531749727037, MinMeansOfNumericAtts : -6.798000173890983e-10, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.26268514717166463, MinStdDevOfNumericAtts : 0.9999999988017908, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 6.0, NumberOfInstances : 1000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 6.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.1757650060563862, Quartile1MeansOfNumericAtts : -5.268525095636357e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.10150372560410345, Quartile1StdDevOfNumericAtts : 0.9999999989233246, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -0.40648735723980156, Quartile2MeansOfNumericAtts : 8.164990517034463e-12, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.003476917189752071, Quartile2StdDevOfNumericAtts : 1.0000000000893352, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 0.30553514931408665, Quartile3MeansOfNumericAtts : 4.840999964783599e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.8317905462748747, Quartile3StdDevOfNumericAtts : 1.0000000004258218, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 26.0,\n", + " 'NumberOfInstances': 100.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 26.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 482,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 629,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c1_100_25',\n", + " 'qualities': 'AutoCorrelation : -0.09779439888888883, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.26, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.8167471085084324, MaxMeansOfNumericAtts : 2.9259999747427435e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.3253491953587132, MaxStdDevOfNumericAtts : 1.0000000033301368, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.1067805258625716, MeanMeansOfNumericAtts : -1.5473070181696822e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : -0.052514005600623095, MeanStdDevOfNumericAtts : 0.9999999992124979, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.2581761797595237, MinMeansOfNumericAtts : -4.970000024862032e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.3603839801343778, MinStdDevOfNumericAtts : 0.9999999960025994, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 26.0, NumberOfInstances : 100.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 26.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.1897722669281043, Quartile1MeansOfNumericAtts : -1.3124999958050166e-09, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.17995230709375323, Quartile1StdDevOfNumericAtts : 0.9999999977387246, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1290672971179352, Quartile2MeansOfNumericAtts : 9.449996674071315e-11, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : -0.037178364724484256, Quartile2StdDevOfNumericAtts : 0.9999999989666395, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.0502994145771845, Quartile3MeansOfNumericAtts : 1.5297500149946686e-09, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.04009025014922827, Quartile3StdDevOfNumericAtts : 1.0000000010242138, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 51.0,\n", + " 'NumberOfInstances': 250.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 51.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 485,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 632,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)], 26 : [26 - oz27 (numeric)], 27 : [27 - oz28 (numeric)], 28 : [28 - oz29 (numeric)], 29 : [29 - oz30 (numeric)], 30 : [30 - oz31 (numeric)], 31 : [31 - oz32 (numeric)], 32 : [32 - oz33 (numeric)], 33 : [33 - oz34 (numeric)], 34 : [34 - oz35 (numeric)], 35 : [35 - oz36 (numeric)], 36 : [36 - oz37 (numeric)], 37 : [37 - oz38 (numeric)], 38 : [38 - oz39 (numeric)], 39 : [39 - oz40 (numeric)], 40 : [40 - oz41 (numeric)], 41 : [41 - oz42 (numeric)], 42 : [42 - oz43 (numeric)], 43 : [43 - oz44 (numeric)], 44 : [44 - oz45 (numeric)], 45 : [45 - oz46 (numeric)], 46 : [46 - oz47 (numeric)], 47 : [47 - oz48 (numeric)], 48 : [48 - oz49 (numeric)], 49 : [49 - oz50 (numeric)], 50 : [50 - oz51 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c3_250_50',\n", + " 'qualities': 'AutoCorrelation : -0.06448036682730894, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.204, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 1.7495107166524035, MaxMeansOfNumericAtts : 2.5303999468678784e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 1.3694217971674387, MaxStdDevOfNumericAtts : 1.0000000035578938, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.0687767190342374, MeanMeansOfNumericAtts : 2.1276815781402482e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.06788173473054655, MeanStdDevOfNumericAtts : 0.9999999997904486, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3143243017055757, MinMeansOfNumericAtts : -2.592000004852935e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.1210807693154695, MinStdDevOfNumericAtts : 0.999999996033757, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 51.0, NumberOfInstances : 250.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 51.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2388496054751004, Quartile1MeansOfNumericAtts : -1.991999845074588e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.028895106513236752, Quartile1StdDevOfNumericAtts : 0.9999999987411002, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1932137303225214, Quartile2MeansOfNumericAtts : 3.696000145936296e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.021333455299926984, Quartile2StdDevOfNumericAtts : 0.9999999997743819, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.1469225547639952, Quartile3MeansOfNumericAtts : 8.612000073071612e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.08315248174950719, Quartile3StdDevOfNumericAtts : 1.0000000010301344, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 26.0,\n", + " 'NumberOfInstances': 500.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 26.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 486,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 633,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c0_500_25',\n", + " 'qualities': 'AutoCorrelation : -0.13036079975631423, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.052, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.4449234723732558, MaxMeansOfNumericAtts : 1.1983999832487767e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.14645379310596413, MaxStdDevOfNumericAtts : 1.0000000018816029, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.152024648303206, MeanMeansOfNumericAtts : -2.599976863291677e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.01954441443739426, MeanStdDevOfNumericAtts : 0.9999999999991531, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3039901029862297, MinMeansOfNumericAtts : -1.2978600221202895e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.14502350732769076, MinStdDevOfNumericAtts : 0.9999999966562448, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 26.0, NumberOfInstances : 500.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 26.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2168936270698305, Quartile1MeansOfNumericAtts : -7.774499892199138e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.020749712473178403, Quartile1StdDevOfNumericAtts : 0.9999999994199817, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1817661435374496, Quartile2MeansOfNumericAtts : -3.609899614787082e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.01693542242465498, Quartile2StdDevOfNumericAtts : 1.0000000001767857, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.1465827310167538, Quartile3MeansOfNumericAtts : 9.285000837633285e-11, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.06858896650376467, Quartile3StdDevOfNumericAtts : 1.0000000006976897, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 11.0,\n", + " 'NumberOfInstances': 100.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 11.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 487,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 634,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c2_100_10',\n", + " 'qualities': 'AutoCorrelation : -0.05580917017171747, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.11, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 0.8250263918664871, MaxMeansOfNumericAtts : 4.720000021385396e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.901630521606414, MaxStdDevOfNumericAtts : 1.0000000025817553, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.8873727255853247, MeanMeansOfNumericAtts : 1.0969327334343534e-09, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.11144929055475318, MeanStdDevOfNumericAtts : 0.9999999998049834, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3569116235387846, MinMeansOfNumericAtts : -1.8039999971630749e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.46226756651637757, MinStdDevOfNumericAtts : 0.9999999964959101, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 11.0, NumberOfInstances : 100.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 11.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2957269629589532, Quartile1MeansOfNumericAtts : -1.4674000903269757e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.07348100988853508, Quartile1StdDevOfNumericAtts : 0.9999999978517549, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.124085302153607, Quartile2MeansOfNumericAtts : 8.399999806840696e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.09329535632550585, Quartile2StdDevOfNumericAtts : 0.9999999993175158, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -0.4607984092143367, Quartile3MeansOfNumericAtts : 2.44999999399198e-09, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.2637436042765021, Quartile3StdDevOfNumericAtts : 1.0000000022712177, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 11.0,\n", + " 'NumberOfInstances': 250.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 11.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 488,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 635,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c0_250_10',\n", + " 'qualities': 'AutoCorrelation : -0.19571919690361395, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.044, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.49539348492487134, MaxMeansOfNumericAtts : 9.80799973432367e-10, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.12543681897161996, MaxStdDevOfNumericAtts : 1.0000000034055463, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.091995458400567, MeanMeansOfNumericAtts : -5.895709083386154e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : -0.020703234788118627, MeanStdDevOfNumericAtts : 0.9999999992112348, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.2642171132325368, MinMeansOfNumericAtts : -1.7864000378953195e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.15693574724850204, MinStdDevOfNumericAtts : 0.9999999965931693, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 11.0, NumberOfInstances : 250.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 11.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.1866639579972689, Quartile1MeansOfNumericAtts : -1.351999991627828e-09, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.07245627680151549, Quartile1StdDevOfNumericAtts : 0.9999999975463832, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1448645319602913, Quartile2MeansOfNumericAtts : -8.391999681833796e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : -0.03516477227398804, Quartile2StdDevOfNumericAtts : 0.99999999894645, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.0684550808914777, Quartile3MeansOfNumericAtts : 1.7840003208036136e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.062462108917709154, Quartile3StdDevOfNumericAtts : 0.9999999999553407, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 51.0,\n", + " 'NumberOfInstances': 100.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 51.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 489,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 636,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)], 26 : [26 - oz27 (numeric)], 27 : [27 - oz28 (numeric)], 28 : [28 - oz29 (numeric)], 29 : [29 - oz30 (numeric)], 30 : [30 - oz31 (numeric)], 31 : [31 - oz32 (numeric)], 32 : [32 - oz33 (numeric)], 33 : [33 - oz34 (numeric)], 34 : [34 - oz35 (numeric)], 35 : [35 - oz36 (numeric)], 36 : [36 - oz37 (numeric)], 37 : [37 - oz38 (numeric)], 38 : [38 - oz39 (numeric)], 39 : [39 - oz40 (numeric)], 40 : [40 - oz41 (numeric)], 41 : [41 - oz42 (numeric)], 42 : [42 - oz43 (numeric)], 43 : [43 - oz44 (numeric)], 44 : [44 - oz45 (numeric)], 45 : [45 - oz46 (numeric)], 46 : [46 - oz47 (numeric)], 47 : [47 - oz48 (numeric)], 48 : [48 - oz49 (numeric)], 49 : [49 - oz50 (numeric)], 50 : [50 - oz51 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c1_100_50',\n", + " 'qualities': 'AutoCorrelation : -0.15358441155555566, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.51, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.6068823154055947, MaxMeansOfNumericAtts : 3.939999986268816e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.30742283514774926, MaxStdDevOfNumericAtts : 1.0000000036509502, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.1445225066134481, MeanMeansOfNumericAtts : 1.4522157308110365e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.013614947481475233, MeanStdDevOfNumericAtts : 0.9999999995473542, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.4451227705200003, MinMeansOfNumericAtts : -4.590000000082028e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.5508008053990625, MinStdDevOfNumericAtts : 0.9999999948249523, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 51.0, NumberOfInstances : 100.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 51.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2376474236436446, Quartile1MeansOfNumericAtts : -1.1960000256827642e-09, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.07685021807088445, Quartile1StdDevOfNumericAtts : 0.9999999979459647, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1771948671208612, Quartile2MeansOfNumericAtts : 1.9000002460245467e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.011542347915134551, Quartile2StdDevOfNumericAtts : 0.9999999997567961, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.048683670883777, Quartile3MeansOfNumericAtts : 1.6000000073457256e-09, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.11943274904046973, Quartile3StdDevOfNumericAtts : 1.0000000008933372, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 51.0,\n", + " 'NumberOfInstances': 500.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 51.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 490,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 637,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)], 26 : [26 - oz27 (numeric)], 27 : [27 - oz28 (numeric)], 28 : [28 - oz29 (numeric)], 29 : [29 - oz30 (numeric)], 30 : [30 - oz31 (numeric)], 31 : [31 - oz32 (numeric)], 32 : [32 - oz33 (numeric)], 33 : [33 - oz34 (numeric)], 34 : [34 - oz35 (numeric)], 35 : [35 - oz36 (numeric)], 36 : [36 - oz37 (numeric)], 37 : [37 - oz38 (numeric)], 38 : [38 - oz39 (numeric)], 39 : [39 - oz40 (numeric)], 40 : [40 - oz41 (numeric)], 41 : [41 - oz42 (numeric)], 42 : [42 - oz43 (numeric)], 43 : [43 - oz44 (numeric)], 44 : [44 - oz45 (numeric)], 45 : [45 - oz46 (numeric)], 46 : [46 - oz47 (numeric)], 47 : [47 - oz48 (numeric)], 48 : [48 - oz49 (numeric)], 49 : [49 - oz50 (numeric)], 50 : [50 - oz51 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c1_500_50',\n", + " 'qualities': 'AutoCorrelation : -0.1029116105935867, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.102, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.3958006181295963, MaxMeansOfNumericAtts : 1.854199979112181e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.14393312684727844, MaxStdDevOfNumericAtts : 1.0000000025959064, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.1642918641905624, MeanMeansOfNumericAtts : 1.2128730281619083e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : -0.012760485342081898, MeanStdDevOfNumericAtts : 0.9999999998792359, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3384923344675994, MinMeansOfNumericAtts : -1.8019999661333941e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.281036048339426, MinStdDevOfNumericAtts : 0.9999999978689819, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 51.0, NumberOfInstances : 500.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 51.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.223622948866781, Quartile1MeansOfNumericAtts : -4.309000054547596e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.06723563152489599, Quartile1StdDevOfNumericAtts : 0.9999999991271817, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1655706465365527, Quartile2MeansOfNumericAtts : 1.4859997721217156e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : -0.013705906353867023, Quartile2StdDevOfNumericAtts : 0.9999999998569478, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.1477087655562215, Quartile3MeansOfNumericAtts : 7.3686000745532e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.04372579533405131, Quartile3StdDevOfNumericAtts : 1.0000000004926681, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 5715.0,\n", + " 'MaxNominalAttDistinctValues': 2.0,\n", + " 'MinorityClassSize': 2477.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 22.0,\n", + " 'NumberOfInstances': 8192.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 21.0,\n", + " 'NumberOfSymbolicFeatures': 1.0,\n", + " 'Unnamed: 0': 610,\n", + " 'description': \"**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nBinarized version of the original data set (see version 1). It converts the numeric target feature to a two-class nominal target feature by computing the mean and classifying all instances with a lower target value as positive ('P') and all others as negative ('N').\",\n", + " 'did': 761,\n", + " 'features': '0 : [0 - lread (numeric)], 1 : [1 - lwrite (numeric)], 2 : [2 - scall (numeric)], 3 : [3 - sread (numeric)], 4 : [4 - swrite (numeric)], 5 : [5 - fork (numeric)], 6 : [6 - exec (numeric)], 7 : [7 - rchar (numeric)], 8 : [8 - wchar (numeric)], 9 : [9 - pgout (numeric)], 10 : [10 - ppgout (numeric)], 11 : [11 - pgfree (numeric)], 12 : [12 - pgscan (numeric)], 13 : [13 - atch (numeric)], 14 : [14 - pgin (numeric)], 15 : [15 - ppgin (numeric)], 16 : [16 - pflt (numeric)], 17 : [17 - vflt (numeric)], 18 : [18 - runqsz (numeric)], 19 : [19 - freemem (numeric)], 20 : [20 - freeswap (numeric)], 21 : [21 - binaryClass (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'cpu_act',\n", + " 'qualities': 'AutoCorrelation : 0.569893785862532, CfsSubsetEval_DecisionStumpAUC : 0.9063675579107315, CfsSubsetEval_DecisionStumpErrRate : 0.0908203125, CfsSubsetEval_DecisionStumpKappa : 0.7817815135582044, CfsSubsetEval_NaiveBayesAUC : 0.9063675579107315, CfsSubsetEval_NaiveBayesErrRate : 0.0908203125, CfsSubsetEval_NaiveBayesKappa : 0.7817815135582044, CfsSubsetEval_kNN1NAUC : 0.9063675579107315, CfsSubsetEval_kNN1NErrRate : 0.0908203125, CfsSubsetEval_kNN1NKappa : 0.7817815135582044, ClassEntropy : 0.8841664897438648, DecisionStumpAUC : 0.8142696181951823, DecisionStumpErrRate : 0.1429443359375, DecisionStumpKappa : 0.6521448025469263, Dimensionality : 0.002685546875, EquivalentNumberOfAtts : nan, J48.00001.AUC : 0.9007884965126232, J48.00001.ErrRate : 0.087158203125, J48.00001.Kappa : 0.7888672445074003, J48.0001.AUC : 0.9007884965126232, J48.0001.ErrRate : 0.087158203125, J48.0001.Kappa : 0.7888672445074003, J48.001.AUC : 0.9007884965126232, J48.001.ErrRate : 0.087158203125, J48.001.Kappa : 0.7888672445074003, MajorityClassPercentage : 69.76318359375, MajorityClassSize : 5715.0, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 618.0754029990946, MaxMeansOfNumericAtts : 1328125.9598388672, MaxMutualInformation : nan, MaxNominalAttDistinctValues : 2.0, MaxSkewnessOfNumericAtts : 21.542019683245, MaxStdDevOfNumericAtts : 422019.42695680115, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : 82.69398017053769, MeanMeansOfNumericAtts : 77423.0389258975, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : 2.0, MeanSkewnessOfNumericAtts : 5.398356926130622, MeanStdDevOfNumericAtts : 38448.58530864211, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : 0.9031412644665848, MinMeansOfNumericAtts : 1.1275048828124994, MinMutualInformation : nan, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : -0.7916644438525916, MinStdDevOfNumericAtts : 2.479493426887122, MinorityClassPercentage : 30.23681640625, MinorityClassSize : 2477.0, NaiveBayesAUC : 0.9568507715938583, NaiveBayesErrRate : 0.10498046875, NaiveBayesKappa : 0.747991928724961, NumberOfBinaryFeatures : 1.0, NumberOfClasses : 2.0, NumberOfFeatures : 22.0, NumberOfInstances : 8192.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 21.0, NumberOfSymbolicFeatures : 1.0, PercentageOfBinaryFeatures : 4.545454545454546, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 95.45454545454545, PercentageOfSymbolicFeatures : 4.545454545454546, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : 4.347867554757658, Quartile1MeansOfNumericAtts : 7.12759460449219, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : 2.02862189623975, Quartile1StdDevOfNumericAtts : 14.544784181911057, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : 22.821806534075247, Quartile2MeansOfNumericAtts : 19.630676269531282, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 4.069237707552649, Quartile2StdDevOfNumericAtts : 71.1413402583838, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 69.61173013955492, Quartile3MeansOfNumericAtts : 986.9681396484375, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 5.636440553258243, Quartile3StdDevOfNumericAtts : 916.2987340329522, REPTreeDepth1AUC : 0.9439130463960475, REPTreeDepth1ErrRate : 0.0888671875, REPTreeDepth1Kappa : 0.7850288992712985, REPTreeDepth2AUC : 0.9439130463960475, REPTreeDepth2ErrRate : 0.0888671875, REPTreeDepth2Kappa : 0.7850288992712985, REPTreeDepth3AUC : 0.9439130463960475, REPTreeDepth3ErrRate : 0.0888671875, REPTreeDepth3Kappa : 0.7850288992712985, RandomTreeDepth1AUC : 0.8805606505484755, RandomTreeDepth1ErrRate : 0.1005859375, RandomTreeDepth1Kappa : 0.7614696522066133, RandomTreeDepth2AUC : 0.8805606505484755, RandomTreeDepth2ErrRate : 0.1005859375, RandomTreeDepth2Kappa : 0.7614696522066133, RandomTreeDepth3AUC : 0.8805606505484755, RandomTreeDepth3ErrRate : 0.1005859375, RandomTreeDepth3Kappa : 0.7614696522066133, StdvNominalAttDistinctValues : 0.0, kNN1NAUC : 0.8873110128492718, kNN1NErrRate : 0.0902099609375, kNN1NKappa : 0.7836743818275687,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 3},\n", + " {'MajorityClassSize': 563.0,\n", + " 'MaxNominalAttDistinctValues': 2.0,\n", + " 'MinorityClassSize': 437.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 6.0,\n", + " 'NumberOfInstances': 1000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 5.0,\n", + " 'NumberOfSymbolicFeatures': 1.0,\n", + " 'Unnamed: 0': 660,\n", + " 'description': \"**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nBinarized version of the original data set (see version 1). It converts the numeric target feature to a two-class nominal target feature by computing the mean and classifying all instances with a lower target value as positive ('P') and all others as negative ('N').\",\n", + " 'did': 813,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - binaryClass (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c3_1000_5',\n", + " 'qualities': 'AutoCorrelation : 0.5085085085085085, CfsSubsetEval_DecisionStumpAUC : 0.8395669651385395, CfsSubsetEval_DecisionStumpErrRate : 0.166, CfsSubsetEval_DecisionStumpKappa : 0.6636775843135982, CfsSubsetEval_NaiveBayesAUC : 0.8395669651385395, CfsSubsetEval_NaiveBayesErrRate : 0.166, CfsSubsetEval_NaiveBayesKappa : 0.6636775843135982, CfsSubsetEval_kNN1NAUC : 0.8395669651385395, CfsSubsetEval_kNN1NErrRate : 0.166, CfsSubsetEval_kNN1NKappa : 0.6636775843135982, ClassEntropy : 0.9885173903891564, DecisionStumpAUC : 0.6816478411257116, DecisionStumpErrRate : 0.307, DecisionStumpKappa : 0.38413763219977204, Dimensionality : 0.006, EquivalentNumberOfAtts : nan, J48.00001.AUC : 0.8689982156720089, J48.00001.ErrRate : 0.141, J48.00001.Kappa : 0.7123458704291555, J48.0001.AUC : 0.8689982156720089, J48.0001.ErrRate : 0.141, J48.0001.Kappa : 0.7123458704291555, J48.001.AUC : 0.8689982156720089, J48.001.ErrRate : 0.141, J48.001.Kappa : 0.7123458704291555, MajorityClassPercentage : 56.3, MajorityClassSize : 563.0, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 1.153622378636951, MaxMeansOfNumericAtts : 9.000000004810715e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : 2.0, MaxSkewnessOfNumericAtts : 1.246941780251248, MaxStdDevOfNumericAtts : 1.0000000357900796, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.39534744957352824, MeanMeansOfNumericAtts : 2.0000000069686498e-09, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : 2.0, MeanSkewnessOfNumericAtts : 0.3799051670286494, MeanStdDevOfNumericAtts : 1.0000000300225595, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.199653149643545, MinMeansOfNumericAtts : -4.0000000260942415e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : -0.047776570555339945, MinStdDevOfNumericAtts : 1.0000000249286416, MinorityClassPercentage : 43.7, MinorityClassSize : 437.0, NaiveBayesAUC : 0.7034560685442078, NaiveBayesErrRate : 0.331, NaiveBayesKappa : 0.3247268305819183, NumberOfBinaryFeatures : 1.0, NumberOfClasses : 2.0, NumberOfFeatures : 6.0, NumberOfInstances : 1000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 5.0, NumberOfSymbolicFeatures : 1.0, PercentageOfBinaryFeatures : 16.666666666666664, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 83.33333333333334, PercentageOfSymbolicFeatures : 16.666666666666664, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.1837277136220274, Quartile1MeansOfNumericAtts : -2.4999999939656625e-09, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.040956945030882735, Quartile1StdDevOfNumericAtts : 1.0000000252149224, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -0.7857435131844928, Quartile2MeansOfNumericAtts : 3.9968028886505634e-17, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.041091208226431544, Quartile2StdDevOfNumericAtts : 1.0000000316528863, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 0.5882308462804533, Quartile3MeansOfNumericAtts : 7.499999991403272e-09, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.9701742584892905, Quartile3StdDevOfNumericAtts : 1.0000000340150332, REPTreeDepth1AUC : 0.8576500522291906, REPTreeDepth1ErrRate : 0.155, REPTreeDepth1Kappa : 0.6850796849984152, REPTreeDepth2AUC : 0.8576500522291906, REPTreeDepth2ErrRate : 0.155, REPTreeDepth2Kappa : 0.6850796849984152, REPTreeDepth3AUC : 0.8576500522291906, REPTreeDepth3ErrRate : 0.155, REPTreeDepth3Kappa : 0.6850796849984152, RandomTreeDepth1AUC : 0.8483483788628262, RandomTreeDepth1ErrRate : 0.15, RandomTreeDepth1Kappa : 0.6956280006006273, RandomTreeDepth2AUC : 0.8483483788628262, RandomTreeDepth2ErrRate : 0.15, RandomTreeDepth2Kappa : 0.6956280006006273, RandomTreeDepth3AUC : 0.8483483788628262, RandomTreeDepth3ErrRate : 0.15, RandomTreeDepth3Kappa : 0.6956280006006273, StdvNominalAttDistinctValues : 0.0, kNN1NAUC : 0.8667749186078177, kNN1NErrRate : 0.133, kNN1NKappa : 0.730743068152371,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 2}],\n", + " 'documents': ['**Author**: \\n**Source**: Unknown - \\n**Please cite**: \\n\\nCitation Request:\\n This breast cancer domain was obtained from the University Medical Centre,\\n Institute of Oncology, Ljubljana, Yugoslavia. Thanks go to M. Zwitter and \\n M. Soklic for providing the data. Please include this citation if you plan\\n to use this database.\\n \\n 1. Title: Breast cancer data (Michalski has used this)\\n \\n 2. Sources: \\n -- Matjaz Zwitter & Milan Soklic (physicians)\\n Institute of Oncology \\n University Medical Center\\n Ljubljana, Yugoslavia\\n -- Donors: Ming Tan and Jeff Schlimmer (Jeffrey.Schlimmer@a.gp.cs.cmu.edu)\\n -- Date: 11 July 1988\\n \\n 3. Past Usage: (Several: here are some)\\n -- Michalski,R.S., Mozetic,I., Hong,J., & Lavrac,N. (1986). The \\n Multi-Purpose Incremental Learning System AQ15 and its Testing \\n Application to Three Medical Domains. In Proceedings of the \\n Fifth National Conference on Artificial Intelligence, 1041-1045,\\n Philadelphia, PA: Morgan Kaufmann.\\n -- accuracy range: 66%-72%\\n -- Clark,P. & Niblett,T. (1987). Induction in Noisy Domains. In \\n Progress in Machine Learning (from the Proceedings of the 2nd\\n European Working Session on Learning), 11-30, Bled, \\n Yugoslavia: Sigma Press.\\n -- 8 test results given: 65%-72% accuracy range\\n -- Tan, M., & Eshelman, L. (1988). Using weighted networks to \\n represent classification knowledge in noisy domains. Proceedings \\n of the Fifth International Conference on Machine Learning, 121-134,\\n Ann Arbor, MI.\\n -- 4 systems tested: accuracy range was 68%-73.5%\\n -- Cestnik,G., Konenenko,I, & Bratko,I. (1987). Assistant-86: A\\n Knowledge-Elicitation Tool for Sophisticated Users. In I.Bratko\\n & N.Lavrac (Eds.) Progress in Machine Learning, 31-45, Sigma Press.\\n -- Assistant-86: 78% accuracy\\n \\n 4. Relevant Information:\\n This is one of three domains provided by the Oncology Institute\\n that has repeatedly appeared in the machine learning literature.\\n (See also lymphography and primary-tumor.)\\n \\n This data set includes 201 instances of one class and 85 instances of\\n another class. The instances are described by 9 attributes, some of\\n which are linear and some are nominal.\\n \\n 5. Number of Instances: 286\\n \\n 6. Number of Attributes: 9 + the class attribute\\n \\n 7. Attribute Information:\\n 1. Class: no-recurrence-events, recurrence-events\\n 2. age: 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70-79, 80-89, 90-99.\\n 3. menopause: lt40, ge40, premeno.\\n 4. tumor-size: 0-4, 5-9, 10-14, 15-19, 20-24, 25-29, 30-34, 35-39, 40-44,\\n 45-49, 50-54, 55-59.\\n 5. inv-nodes: 0-2, 3-5, 6-8, 9-11, 12-14, 15-17, 18-20, 21-23, 24-26,\\n 27-29, 30-32, 33-35, 36-39.\\n 6. node-caps: yes, no.\\n 7. deg-malig: 1, 2, 3.\\n 8. breast: left, right.\\n 9. breast-quad: left-up, left-low, right-up, right-low, central.\\n 10. irradiat: yes, no.\\n \\n 8. Missing Attribute Values: (denoted by \"?\")\\n Attribute #: Number of instances with missing values:\\n 6. 8\\n 9. 1.\\n \\n 9. Class Distribution:\\n 1. no-recurrence-events: 201 instances\\n 2. recurrence-events: 85 instances\\n\\n Num Instances: 286\\n Num Attributes: 10\\n Num Continuous: 0 (Int 0 / Real 0)\\n Num Discrete: 10\\n Missing values: 9 / 0.3%\\n\\n name type enum ints real missing distinct (1)\\n 1 \\'age\\' Enum 100% 0% 0% 0 / 0% 6 / 2% 0% \\n 2 \\'menopause\\' Enum 100% 0% 0% 0 / 0% 3 / 1% 0% \\n 3 \\'tumor-size\\' Enum 100% 0% 0% 0 / 0% 11 / 4% 0% \\n 4 \\'inv-nodes\\' Enum 100% 0% 0% 0 / 0% 7 / 2% 0% \\n 5 \\'node-caps\\' Enum 97% 0% 0% 8 / 3% 2 / 1% 0% \\n 6 \\'deg-malig\\' Enum 100% 0% 0% 0 / 0% 3 / 1% 0% \\n 7 \\'breast\\' Enum 100% 0% 0% 0 / 0% 2 / 1% 0% \\n 8 \\'breast-quad\\' Enum 100% 0% 0% 1 / 0% 5 / 2% 0% \\n 9 \\'irradiat\\' Enum 100% 0% 0% 0 / 0% 2 / 1% 0% \\n 10 \\'Class\\' Enum 100% 0% 0% 0 / 0% 2 / 1% 0%',\n", + " 'nan',\n", + " 'nan',\n", + " 'nan',\n", + " 'nan',\n", + " 'nan',\n", + " 'nan',\n", + " 'nan',\n", + " 'nan',\n", + " 'nan',\n", + " '**Author**: Yoram Reich\",\"Steven J. Fenves \\n \\n**Source**: [original](http://openml.org/d/19) - \\n**Please cite**: \\n\\nPittsburgh bridges \\n\\nThis version is derived from version 2 (the discretized version) by removing all instances with missing values in the last (target) attribute. The bridges dataset is originally not a classification dataset, put is used so extensively in the literature, using the last attribute as the target attribute. However, this attribute has missing values, which may lead to confusing benchmarking result. Therefore, these instances have been removed. \\n\\nSources: \\n-- Yoram Reich and Steven J. Fenves Department of Civil Engineering and Engineering Design Research Center Carnegie Mellon University Pittsburgh, PA 15213 Compiled from various sources. \\n-- Date: 1 August 1990 \\n\\nAttribute Information: The type field state whether a property is continuous/integer (c) or nominal (n). For properties with c,n type, the range of continuous numbers is given first and the possible values of the nominal follow the semi-colon. \\n\\nname type possible values comments \\n------------------------------------------------------------------------ \\n1. IDENTIF - - identifier of the examples \\n2. RIVER n A, M, O \\n3. LOCATION n 1 to 52 \\n4. ERECTED c,n 1818-1986 - CRAFTS, EMERGING, MATURE, MODERN \\n5. PURPOSE n WALK, AQUEDUCT, RR, HIGHWAY \\n6. LENGTH c,n 804-4558 - SHORT, MEDIUM, LONG \\n7. LANES c,n 1, 2, 4, 6 - 1, 2, 4, 6 \\n8. CLEAR-G n N, G \\n9. T-OR-D n THROUGH, DECK \\n10. MATERIAL n WOOD, IRON, STEEL \\n11. SPAN n SHORT, MEDIUM, LONG \\n12. REL-L n S, S-F, F \\n13. TYPE n WOOD, SUSPEN, SIMPLE-T, ARCH, CANTILEV, CONT-T',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nanalcatdata A collection of data sets used in the book \"Analyzing Categorical Data,\"\\nby Jeffrey S. Simonoff, Springer-Verlag, New York, 2003. The submission\\nconsists of a zip file containing two versions of each of 84 data sets,\\nplus this README file. Each data set is given in comma-delimited ASCII\\n(.csv) form, and Microsoft Excel (.xls) form.\\n\\nNOTICE: These data sets may be used freely for scientific, educational and/or\\nnoncommercial purposes, provided suitable acknowledgment is given (by citing\\nthe above-named reference).\\n\\nFurther details concerning the book, including information on statistical software\\n(including sample S-PLUS/R and SAS code), are available at the web site\\n\\nhttp://www.stern.nyu.edu/~jsimonof/AnalCatData\\n\\n\\nInformation about the dataset\\nCLASSTYPE: numeric\\nCLASSINDEX: last\\n\\n\\nNote: Quotes, Single-Quotes and Backslashes were removed, Blanks replaced\\nwith Underscores',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " \"**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nBinarized version of the original data set (see version 1). It converts the numeric target feature to a two-class nominal target feature by computing the mean and classifying all instances with a lower target value as positive ('P') and all others as negative ('N').\",\n", + " \"**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nBinarized version of the original data set (see version 1). It converts the numeric target feature to a two-class nominal target feature by computing the mean and classifying all instances with a lower target value as positive ('P') and all others as negative ('N').\"],\n", + " 'uris': None,\n", + " 'data': None}" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "database_filter = collec.get(ids = ids, where=filter_condition)\n", + "database_filter" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ids': ['13',\n", + " '245',\n", + " '246',\n", + " '249',\n", + " '250',\n", + " '253',\n", + " '254',\n", + " '255',\n", + " '257',\n", + " '258',\n", + " '328',\n", + " '506',\n", + " '623',\n", + " '624',\n", + " '626',\n", + " '627',\n", + " '628',\n", + " '629',\n", + " '632',\n", + " '633',\n", + " '634',\n", + " '635',\n", + " '636',\n", + " '637',\n", + " '761',\n", + " '813'],\n", + " 'embeddings': None,\n", + " 'metadatas': [{'MajorityClassSize': 201.0,\n", + " 'MaxNominalAttDistinctValues': 11.0,\n", + " 'MinorityClassSize': 85.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 10.0,\n", + " 'NumberOfInstances': 286.0,\n", + " 'NumberOfInstancesWithMissingValues': 9.0,\n", + " 'NumberOfMissingValues': 9.0,\n", + " 'NumberOfNumericFeatures': 0.0,\n", + " 'NumberOfSymbolicFeatures': 10.0,\n", + " 'Unnamed: 0': 11,\n", + " 'description': '**Author**: \\n**Source**: Unknown - \\n**Please cite**: \\n\\nCitation Request:\\n This breast cancer domain was obtained from the University Medical Centre,\\n Institute of Oncology, Ljubljana, Yugoslavia. Thanks go to M. Zwitter and \\n M. Soklic for providing the data. Please include this citation if you plan\\n to use this database.\\n \\n 1. Title: Breast cancer data (Michalski has used this)\\n \\n 2. Sources: \\n -- Matjaz Zwitter & Milan Soklic (physicians)\\n Institute of Oncology \\n University Medical Center\\n Ljubljana, Yugoslavia\\n -- Donors: Ming Tan and Jeff Schlimmer (Jeffrey.Schlimmer@a.gp.cs.cmu.edu)\\n -- Date: 11 July 1988\\n \\n 3. Past Usage: (Several: here are some)\\n -- Michalski,R.S., Mozetic,I., Hong,J., & Lavrac,N. (1986). The \\n Multi-Purpose Incremental Learning System AQ15 and its Testing \\n Application to Three Medical Domains. In Proceedings of the \\n Fifth National Conference on Artificial Intelligence, 1041-1045,\\n Philadelphia, PA: Morgan Kaufmann.\\n -- accuracy range: 66%-72%\\n -- Clark,P. & Niblett,T. (1987). Induction in Noisy Domains. In \\n Progress in Machine Learning (from the Proceedings of the 2nd\\n European Working Session on Learning), 11-30, Bled, \\n Yugoslavia: Sigma Press.\\n -- 8 test results given: 65%-72% accuracy range\\n -- Tan, M., & Eshelman, L. (1988). Using weighted networks to \\n represent classification knowledge in noisy domains. Proceedings \\n of the Fifth International Conference on Machine Learning, 121-134,\\n Ann Arbor, MI.\\n -- 4 systems tested: accuracy range was 68%-73.5%\\n -- Cestnik,G., Konenenko,I, & Bratko,I. (1987). Assistant-86: A\\n Knowledge-Elicitation Tool for Sophisticated Users. In I.Bratko\\n & N.Lavrac (Eds.) Progress in Machine Learning, 31-45, Sigma Press.\\n -- Assistant-86: 78% accuracy\\n \\n 4. Relevant Information:\\n This is one of three domains provided by the Oncology Institute\\n that has repeatedly appeared in the machine learning literature.\\n (See also lymphography and primary-tumor.)\\n \\n This data set includes 201 instances of one class and 85 instances of\\n another class. The instances are described by 9 attributes, some of\\n which are linear and some are nominal.\\n \\n 5. Number of Instances: 286\\n \\n 6. Number of Attributes: 9 + the class attribute\\n \\n 7. Attribute Information:\\n 1. Class: no-recurrence-events, recurrence-events\\n 2. age: 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70-79, 80-89, 90-99.\\n 3. menopause: lt40, ge40, premeno.\\n 4. tumor-size: 0-4, 5-9, 10-14, 15-19, 20-24, 25-29, 30-34, 35-39, 40-44,\\n 45-49, 50-54, 55-59.\\n 5. inv-nodes: 0-2, 3-5, 6-8, 9-11, 12-14, 15-17, 18-20, 21-23, 24-26,\\n 27-29, 30-32, 33-35, 36-39.\\n 6. node-caps: yes, no.\\n 7. deg-malig: 1, 2, 3.\\n 8. breast: left, right.\\n 9. breast-quad: left-up, left-low, right-up, right-low, central.\\n 10. irradiat: yes, no.\\n \\n 8. Missing Attribute Values: (denoted by \"?\")\\n Attribute #: Number of instances with missing values:\\n 6. 8\\n 9. 1.\\n \\n 9. Class Distribution:\\n 1. no-recurrence-events: 201 instances\\n 2. recurrence-events: 85 instances\\n\\n Num Instances: 286\\n Num Attributes: 10\\n Num Continuous: 0 (Int 0 / Real 0)\\n Num Discrete: 10\\n Missing values: 9 / 0.3%\\n\\n name type enum ints real missing distinct (1)\\n 1 \\'age\\' Enum 100% 0% 0% 0 / 0% 6 / 2% 0% \\n 2 \\'menopause\\' Enum 100% 0% 0% 0 / 0% 3 / 1% 0% \\n 3 \\'tumor-size\\' Enum 100% 0% 0% 0 / 0% 11 / 4% 0% \\n 4 \\'inv-nodes\\' Enum 100% 0% 0% 0 / 0% 7 / 2% 0% \\n 5 \\'node-caps\\' Enum 97% 0% 0% 8 / 3% 2 / 1% 0% \\n 6 \\'deg-malig\\' Enum 100% 0% 0% 0 / 0% 3 / 1% 0% \\n 7 \\'breast\\' Enum 100% 0% 0% 0 / 0% 2 / 1% 0% \\n 8 \\'breast-quad\\' Enum 100% 0% 0% 1 / 0% 5 / 2% 0% \\n 9 \\'irradiat\\' Enum 100% 0% 0% 0 / 0% 2 / 1% 0% \\n 10 \\'Class\\' Enum 100% 0% 0% 0 / 0% 2 / 1% 0%',\n", + " 'did': 13,\n", + " 'features': '0 : [0 - age (nominal)], 1 : [1 - menopause (nominal)], 2 : [2 - tumor-size (nominal)], 3 : [3 - inv-nodes (nominal)], 4 : [4 - node-caps (nominal)], 5 : [5 - deg-malig (nominal)], 6 : [6 - breast (nominal)], 7 : [7 - breast-quad (nominal)], 8 : [8 - irradiat (nominal)], 9 : [9 - Class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'breast-cancer',\n", + " 'qualities': 'AutoCorrelation : 0.5684210526315789, CfsSubsetEval_DecisionStumpAUC : 0.6203687445127305, CfsSubsetEval_DecisionStumpErrRate : 0.2972027972027972, CfsSubsetEval_DecisionStumpKappa : 0.2557099993876677, CfsSubsetEval_NaiveBayesAUC : 0.6780841780402799, CfsSubsetEval_NaiveBayesErrRate : 0.2727272727272727, CfsSubsetEval_NaiveBayesKappa : 0.27311827956989254, CfsSubsetEval_kNN1NAUC : 0.6306409130816506, CfsSubsetEval_kNN1NErrRate : 0.3041958041958042, CfsSubsetEval_kNN1NKappa : 0.15407629020194483, ClassEntropy : 0.8778446951746506, DecisionStumpAUC : 0.6203687445127305, DecisionStumpErrRate : 0.2972027972027972, DecisionStumpKappa : 0.2557099993876677, Dimensionality : 0.03496503496503497, EquivalentNumberOfAtts : 26.01097249370415, J48.00001.AUC : 0.4958150424348844, J48.00001.ErrRate : 0.2972027972027972, J48.00001.Kappa : 0.0, J48.0001.AUC : 0.4958150424348844, J48.0001.ErrRate : 0.2972027972027972, J48.0001.Kappa : 0.0, J48.001.AUC : 0.4958150424348844, J48.001.ErrRate : 0.2972027972027972, J48.001.Kappa : 0.0, MajorityClassPercentage : 70.27972027972028, MajorityClassSize : 201.0, MaxAttributeEntropy : 3.0243614350456793, MaxKurtosisOfNumericAtts : nan, MaxMeansOfNumericAtts : nan, MaxMutualInformation : 0.07700985251661, MaxNominalAttDistinctValues : 11.0, MaxSkewnessOfNumericAtts : nan, MaxStdDevOfNumericAtts : nan, MeanAttributeEntropy : 1.5119033534490414, MeanKurtosisOfNumericAtts : nan, MeanMeansOfNumericAtts : nan, MeanMutualInformation : 0.033749014781632225, MeanNoiseToSignalRatio : 43.798444139231265, MeanNominalAttDistinctValues : 4.3, MeanSkewnessOfNumericAtts : nan, MeanStdDevOfNumericAtts : nan, MinAttributeEntropy : 0.7670030768842513, MinKurtosisOfNumericAtts : nan, MinMeansOfNumericAtts : nan, MinMutualInformation : 0.00200161497371, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : nan, MinStdDevOfNumericAtts : nan, MinorityClassPercentage : 29.72027972027972, MinorityClassSize : 85.0, NaiveBayesAUC : 0.6949663447468539, NaiveBayesErrRate : 0.2762237762237762, NaiveBayesKappa : 0.2827756967811567, NumberOfBinaryFeatures : 4.0, NumberOfClasses : 2.0, NumberOfFeatures : 10.0, NumberOfInstances : 286.0, NumberOfInstancesWithMissingValues : 9.0, NumberOfMissingValues : 9.0, NumberOfNumericFeatures : 0.0, NumberOfSymbolicFeatures : 10.0, PercentageOfBinaryFeatures : 40.0, PercentageOfInstancesWithMissingValues : 3.146853146853147, PercentageOfMissingValues : 0.3146853146853147, PercentageOfNumericFeatures : 0.0, PercentageOfSymbolicFeatures : 100.0, Quartile1AttributeEntropy : 0.8942199564092295, Quartile1KurtosisOfNumericAtts : nan, Quartile1MeansOfNumericAtts : nan, Quartile1MutualInformation : 0.0054919850612, Quartile1SkewnessOfNumericAtts : nan, Quartile1StdDevOfNumericAtts : nan, Quartile2AttributeEntropy : 1.3186781167901145, Quartile2KurtosisOfNumericAtts : nan, Quartile2MeansOfNumericAtts : nan, Quartile2MutualInformation : 0.02581902390914, Quartile2SkewnessOfNumericAtts : nan, Quartile2StdDevOfNumericAtts : nan, Quartile3AttributeEntropy : 2.017460690540565, Quartile3KurtosisOfNumericAtts : nan, Quartile3MeansOfNumericAtts : nan, Quartile3MutualInformation : 0.06308310670709, Quartile3SkewnessOfNumericAtts : nan, Quartile3StdDevOfNumericAtts : nan, REPTreeDepth1AUC : 0.5137254901960784, REPTreeDepth1ErrRate : 0.32867132867132864, REPTreeDepth1Kappa : -0.05073086844367987, REPTreeDepth2AUC : 0.48446005267778747, REPTreeDepth2ErrRate : 0.32517482517482516, REPTreeDepth2Kappa : -0.016509974776427364, REPTreeDepth3AUC : 0.48446005267778747, REPTreeDepth3ErrRate : 0.32517482517482516, REPTreeDepth3Kappa : -0.016509974776427364, RandomTreeDepth1AUC : 0.5874158618671349, RandomTreeDepth1ErrRate : 0.2972027972027972, RandomTreeDepth1Kappa : 0.20487996336756734, RandomTreeDepth2AUC : 0.5834942932396839, RandomTreeDepth2ErrRate : 0.2867132867132867, RandomTreeDepth2Kappa : 0.21811028872441177, RandomTreeDepth3AUC : 0.5585308750365817, RandomTreeDepth3ErrRate : 0.32867132867132864, RandomTreeDepth3Kappa : 0.15591836734693884, StdvNominalAttDistinctValues : 2.9832867780352594, kNN1NAUC : 0.5933274802458297, kNN1NErrRate : 0.3006993006993007, kNN1NKappa : 0.18626348177066082,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 759652.0,\n", + " 'MaxNominalAttDistinctValues': 9.0,\n", + " 'MinorityClassSize': 555.0,\n", + " 'NumberOfClasses': 6.0,\n", + " 'NumberOfFeatures': 39.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 6.0,\n", + " 'NumberOfSymbolicFeatures': 33.0,\n", + " 'Unnamed: 0': 166,\n", + " 'did': 245,\n", + " 'features': '0 : [0 - family (nominal)], 1 : [1 - product-type (nominal)], 2 : [2 - steel (nominal)], 3 : [3 - carbon (numeric)], 4 : [4 - hardness (numeric)], 5 : [5 - temper_rolling (nominal)], 6 : [6 - condition (nominal)], 7 : [7 - formability (nominal)], 8 : [8 - strength (numeric)], 9 : [9 - non-ageing (nominal)], 10 : [10 - surface-finish (nominal)], 11 : [11 - surface-quality (nominal)], 12 : [12 - enamelability (nominal)], 13 : [13 - bc (nominal)], 14 : [14 - bf (nominal)], 15 : [15 - bt (nominal)], 16 : [16 - bw%2Fme (nominal)], 17 : [17 - bl (nominal)], 18 : [18 - m (nominal)], 19 : [19 - chrom (nominal)], 20 : [20 - phos (nominal)], 21 : [21 - cbond (nominal)], 22 : [22 - marvi (nominal)], 23 : [23 - exptl (nominal)], 24 : [24 - ferro (nominal)], 25 : [25 - corr (nominal)], 26 : [26 - blue%2Fbright%2Fvarn%2Fclean (nominal)], 27 : [27 - lustre (nominal)], 28 : [28 - jurofm (nominal)], 29 : [29 - s (nominal)], 30 : [30 - p (nominal)], 31 : [31 - shape (nominal)], 32 : [32 - thick (numeric)], 33 : [33 - width (numeric)], 34 : [34 - len (numeric)], 35 : [35 - oil (nominal)], 36 : [36 - bore (nominal)], 37 : [37 - packing (nominal)], 38 : [38 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(anneal.ORIG)',\n", + " 'qualities': 'AutoCorrelation : 0.5966435966435967, CfsSubsetEval_DecisionStumpAUC : 0.9100425379819419, CfsSubsetEval_DecisionStumpErrRate : 0.140681, CfsSubsetEval_DecisionStumpKappa : 0.6220031797879997, CfsSubsetEval_NaiveBayesAUC : 0.9100425379819419, CfsSubsetEval_NaiveBayesErrRate : 0.140681, CfsSubsetEval_NaiveBayesKappa : 0.6220031797879997, CfsSubsetEval_kNN1NAUC : 0.9100425379819419, CfsSubsetEval_kNN1NErrRate : 0.140681, CfsSubsetEval_kNN1NKappa : 0.6220031797879997, ClassEntropy : 1.2034178134061593, DecisionStumpAUC : 0.5879879850231294, DecisionStumpErrRate : 0.240348, DecisionStumpKappa : 0.0, Dimensionality : 3.9e-05, EquivalentNumberOfAtts : 60.11837170459382, J48.00001.AUC : 0.9448491725907372, J48.00001.ErrRate : 0.100573, J48.00001.Kappa : 0.7443968596935974, J48.0001.AUC : 0.9448491725907372, J48.0001.ErrRate : 0.100573, J48.0001.Kappa : 0.7443968596935974, J48.001.AUC : 0.9448491725907372, J48.001.ErrRate : 0.100573, J48.001.Kappa : 0.7443968596935974, MajorityClassPercentage : 75.9652, MajorityClassSize : 759652.0, MaxAttributeEntropy : 1.684380880218795, MaxKurtosisOfNumericAtts : 7.972323991673457, MaxMeansOfNumericAtts : 1303.5986793821992, MaxMutualInformation : 0.22418299169114, MaxNominalAttDistinctValues : 9.0, MaxSkewnessOfNumericAtts : 3.049930007485628, MaxStdDevOfNumericAtts : 1887.984782494524, MeanAttributeEntropy : 0.2550663392586401, MeanKurtosisOfNumericAtts : 2.8203680278627603, MeanMeansOfNumericAtts : 358.9037324354391, MeanMutualInformation : 0.02001747185235562, MeanNoiseToSignalRatio : 11.742185483758997, MeanNominalAttDistinctValues : 2.4242424242424248, MeanSkewnessOfNumericAtts : 1.7379232001015346, MeanStdDevOfNumericAtts : 412.40611146101406, MinAttributeEntropy : -0.0, MinKurtosisOfNumericAtts : -0.868382824192059, MinMeansOfNumericAtts : 1.17735989039, MinMutualInformation : 0.0, MinNominalAttDistinctValues : 1.0, MinSkewnessOfNumericAtts : 0.0777861407829806, MinStdDevOfNumericAtts : 0.8657360725686571, MinorityClassPercentage : 0.05550000000000001, MinorityClassSize : 555.0, NaiveBayesAUC : 0.8801291929503372, NaiveBayesErrRate : 0.190879, NaiveBayesKappa : 0.5362687884269597, NumberOfBinaryFeatures : 4.0, NumberOfClasses : 6.0, NumberOfFeatures : 39.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 6.0, NumberOfSymbolicFeatures : 33.0, PercentageOfBinaryFeatures : 10.256410256410255, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 15.384615384615385, PercentageOfSymbolicFeatures : 84.61538461538461, Quartile1AttributeEntropy : 0.0, Quartile1KurtosisOfNumericAtts : -0.40342290066472575, Quartile1MeansOfNumericAtts : 4.121271656681081, Quartile1MutualInformation : 0.0, Quartile1SkewnessOfNumericAtts : 0.9465055395022927, Quartile1StdDevOfNumericAtts : 12.199935237910706, Quartile2AttributeEntropy : 0.0, Quartile2KurtosisOfNumericAtts : 1.1822680963070171, Quartile2MeansOfNumericAtts : 29.069557551355082, Quartile2MutualInformation : 0.0, Quartile2SkewnessOfNumericAtts : 1.5129572065602566, Quartile2StdDevOfNumericAtts : 81.31421932163484, Quartile3AttributeEntropy : 0.3151059368331584, Quartile3KurtosisOfNumericAtts : 7.769706297845923, Quartile3MeansOfNumericAtts : 914.9531683394675, Quartile3MutualInformation : 0.0134922075411625, Quartile3SkewnessOfNumericAtts : 3.0408549769799245, Quartile3StdDevOfNumericAtts : 777.2309780706541, REPTreeDepth1AUC : 0.9583107972293207, REPTreeDepth1ErrRate : 0.103254, REPTreeDepth1Kappa : 0.7370276932423044, REPTreeDepth2AUC : 0.9583107972293207, REPTreeDepth2ErrRate : 0.103254, REPTreeDepth2Kappa : 0.7370276932423044, REPTreeDepth3AUC : 0.9583107972293207, REPTreeDepth3ErrRate : 0.103254, REPTreeDepth3Kappa : 0.7370276932423044, RandomTreeDepth1AUC : 0.8355478147338595, RandomTreeDepth1ErrRate : 0.136256, RandomTreeDepth1Kappa : 0.6600393445396219, RandomTreeDepth2AUC : 0.8355478147338595, RandomTreeDepth2ErrRate : 0.136256, RandomTreeDepth2Kappa : 0.6600393445396219, RandomTreeDepth3AUC : 0.8355478147338595, RandomTreeDepth3ErrRate : 0.136256, RandomTreeDepth3Kappa : 0.6600393445396219, StdvNominalAttDistinctValues : 2.136444228009226, kNN1NAUC : 0.8314600775134281, kNN1NErrRate : 0.13295, kNN1NKappa : 0.6662701843447909,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 2},\n", + " {'MajorityClassSize': 647000.0,\n", + " 'MaxNominalAttDistinctValues': 3.0,\n", + " 'MinorityClassSize': 353000.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 17.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 8.0,\n", + " 'NumberOfSymbolicFeatures': 9.0,\n", + " 'Unnamed: 0': 167,\n", + " 'did': 246,\n", + " 'features': '0 : [0 - duration (numeric)], 1 : [1 - wage-increase-first-year (numeric)], 2 : [2 - wage-increase-second-year (numeric)], 3 : [3 - wage-increase-third-year (numeric)], 4 : [4 - cost-of-living-adjustment (nominal)], 5 : [5 - working-hours (numeric)], 6 : [6 - pension (nominal)], 7 : [7 - standby-pay (numeric)], 8 : [8 - shift-differential (numeric)], 9 : [9 - education-allowance (nominal)], 10 : [10 - statutory-holidays (numeric)], 11 : [11 - vacation (nominal)], 12 : [12 - longterm-disability-assistance (nominal)], 13 : [13 - contribution-to-dental-plan (nominal)], 14 : [14 - bereavement-assistance (nominal)], 15 : [15 - contribution-to-health-plan (nominal)], 16 : [16 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(labor)',\n", + " 'qualities': 'AutoCorrelation : 0.5437515437515438, CfsSubsetEval_DecisionStumpAUC : 0.9656760571782601, CfsSubsetEval_DecisionStumpErrRate : 0.073896, CfsSubsetEval_DecisionStumpKappa : 0.8361860872136263, CfsSubsetEval_NaiveBayesAUC : 0.9656760571782601, CfsSubsetEval_NaiveBayesErrRate : 0.073896, CfsSubsetEval_NaiveBayesKappa : 0.8361860872136263, CfsSubsetEval_kNN1NAUC : 0.9656760571782601, CfsSubsetEval_kNN1NErrRate : 0.073896, CfsSubsetEval_kNN1NKappa : 0.8361860872136263, ClassEntropy : 0.9367188103082079, DecisionStumpAUC : 0.7610196570793069, DecisionStumpErrRate : 0.17252, DecisionStumpKappa : 0.58038171156978, Dimensionality : 1.7e-05, EquivalentNumberOfAtts : 7.968568094624322, J48.00001.AUC : 0.9722429466178615, J48.00001.ErrRate : 0.04276, J48.00001.Kappa : 0.9060997150754404, J48.0001.AUC : 0.9722429466178615, J48.0001.ErrRate : 0.04276, J48.0001.Kappa : 0.9060997150754404, J48.001.AUC : 0.9722429466178615, J48.001.ErrRate : 0.04276, J48.001.Kappa : 0.9060997150754404, MajorityClassPercentage : 64.7, MajorityClassSize : 647000.0, MaxAttributeEntropy : 1.5686613437902412, MaxKurtosisOfNumericAtts : 12.97183970122256, MaxMeansOfNumericAtts : 38.207419791592, MaxMutualInformation : 0.32803097149722, MaxNominalAttDistinctValues : 3.0, MaxSkewnessOfNumericAtts : 3.219955909368285, MaxStdDevOfNumericAtts : 3.8364066013681923, MeanAttributeEntropy : 1.030150865706383, MeanKurtosisOfNumericAtts : 2.8964429932042943, MeanMeansOfNumericAtts : 8.968741018983373, MeanMutualInformation : 0.117551710568945, MeanNoiseToSignalRatio : 7.763384732731654, MeanNominalAttDistinctValues : 2.5555555555555554, MeanSkewnessOfNumericAtts : 0.34601419937813327, MeanStdDevOfNumericAtts : 1.891451954866257, MinAttributeEntropy : 0.3890238905472041, MinKurtosisOfNumericAtts : -0.971191333863191, MinMeansOfNumericAtts : 2.151549000000004, MinMutualInformation : 0.00031099636463, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : -2.492429789250301, MinStdDevOfNumericAtts : 0.7025670033463544, MinorityClassPercentage : 35.3, MinorityClassSize : 353000.0, NaiveBayesAUC : 0.9555395606219708, NaiveBayesErrRate : 0.094793, NaiveBayesKappa : 0.7925457977917215, NumberOfBinaryFeatures : 4.0, NumberOfClasses : 2.0, NumberOfFeatures : 17.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 8.0, NumberOfSymbolicFeatures : 9.0, PercentageOfBinaryFeatures : 23.52941176470588, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 47.05882352941176, PercentageOfSymbolicFeatures : 52.94117647058824, Quartile1AttributeEntropy : 0.6564960185674868, Quartile1KurtosisOfNumericAtts : -0.4230542944993998, Quartile1MeansOfNumericAtts : 3.4821577465324913, Quartile1MutualInformation : 0.0406841268226125, Quartile1SkewnessOfNumericAtts : -1.281488718852625, Quartile1StdDevOfNumericAtts : 0.9407666804578398, Quartile2AttributeEntropy : 1.1218408001470537, Quartile2KurtosisOfNumericAtts : 1.7537742946990273, Quartile2MeansOfNumericAtts : 4.105879276327004, Quartile2MutualInformation : 0.082880156145935, Quartile2SkewnessOfNumericAtts : 0.3087406103320252, Quartile2StdDevOfNumericAtts : 1.3717497404114476, Quartile3AttributeEntropy : 1.3559623783139811, Quartile3KurtosisOfNumericAtts : 4.405602715826151, Quartile3MeansOfNumericAtts : 9.656747548797744, Quartile3MutualInformation : 0.1902258165988925, Quartile3SkewnessOfNumericAtts : 1.9521046933055897, Quartile3StdDevOfNumericAtts : 3.1629904450022495, REPTreeDepth1AUC : 0.9835080135775928, REPTreeDepth1ErrRate : 0.044215, REPTreeDepth1Kappa : 0.9028913293511758, REPTreeDepth2AUC : 0.9835080135775928, REPTreeDepth2ErrRate : 0.044215, REPTreeDepth2Kappa : 0.9028913293511758, REPTreeDepth3AUC : 0.9835080135775928, REPTreeDepth3ErrRate : 0.044215, REPTreeDepth3Kappa : 0.9028913293511758, RandomTreeDepth1AUC : 0.9368127614485684, RandomTreeDepth1ErrRate : 0.057895, RandomTreeDepth1Kappa : 0.8732579824692726, RandomTreeDepth2AUC : 0.9368127614485684, RandomTreeDepth2ErrRate : 0.057895, RandomTreeDepth2Kappa : 0.8732579824692726, RandomTreeDepth3AUC : 0.9368127614485684, RandomTreeDepth3ErrRate : 0.057895, RandomTreeDepth3Kappa : 0.8732579824692726, StdvNominalAttDistinctValues : 0.5270462766947299, kNN1NAUC : 0.9369440921752609, kNN1NErrRate : 0.057355, kNN1NKappa : 0.8743678466255509,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 2},\n", + " {'MajorityClassSize': 543495.0,\n", + " 'MaxNominalAttDistinctValues': 8.0,\n", + " 'MinorityClassSize': 16508.0,\n", + " 'NumberOfClasses': 4.0,\n", + " 'NumberOfFeatures': 19.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 3.0,\n", + " 'NumberOfSymbolicFeatures': 16.0,\n", + " 'Unnamed: 0': 170,\n", + " 'did': 249,\n", + " 'features': '0 : [0 - lymphatics (nominal)], 1 : [1 - block_of_affere (nominal)], 2 : [2 - bl_of_lymph_c (nominal)], 3 : [3 - bl_of_lymph_s (nominal)], 4 : [4 - by_pass (nominal)], 5 : [5 - extravasates (nominal)], 6 : [6 - regeneration_of (nominal)], 7 : [7 - early_uptake_in (nominal)], 8 : [8 - lym_nodes_dimin (numeric)], 9 : [9 - lym_nodes_enlar (numeric)], 10 : [10 - changes_in_lym (nominal)], 11 : [11 - defect_in_node (nominal)], 12 : [12 - changes_in_node (nominal)], 13 : [13 - changes_in_stru (nominal)], 14 : [14 - special_forms (nominal)], 15 : [15 - dislocation_of (nominal)], 16 : [16 - exclusion_of_no (nominal)], 17 : [17 - no_of_nodes_in (numeric)], 18 : [18 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(lymph)',\n", + " 'qualities': 'AutoCorrelation : 0.46374446374446376, CfsSubsetEval_DecisionStumpAUC : 0.9525893765375607, CfsSubsetEval_DecisionStumpErrRate : 0.106959, CfsSubsetEval_DecisionStumpKappa : 0.7987920883644265, CfsSubsetEval_NaiveBayesAUC : 0.9525893765375607, CfsSubsetEval_NaiveBayesErrRate : 0.106959, CfsSubsetEval_NaiveBayesKappa : 0.7987920883644265, CfsSubsetEval_kNN1NAUC : 0.9525893765375607, CfsSubsetEval_kNN1NErrRate : 0.106959, CfsSubsetEval_kNN1NKappa : 0.7987920883644265, ClassEntropy : 1.2562056674968567, DecisionStumpAUC : 0.7532346598100107, DecisionStumpErrRate : 0.273471, DecisionStumpKappa : 0.4788451693464286, Dimensionality : 1.9e-05, EquivalentNumberOfAtts : 15.260742108126458, J48.00001.AUC : 0.9618119207180185, J48.00001.ErrRate : 0.08244, J48.00001.Kappa : 0.8451590629533324, J48.0001.AUC : 0.9618119207180185, J48.0001.ErrRate : 0.08244, J48.0001.Kappa : 0.8451590629533324, J48.001.AUC : 0.9618119207180185, J48.001.ErrRate : 0.08244, J48.001.Kappa : 0.8451590629533324, MajorityClassPercentage : 54.34949999999999, MajorityClassSize : 543495.0, MaxAttributeEntropy : 2.723239634537262, MaxKurtosisOfNumericAtts : 18.75326070429752, MaxMeansOfNumericAtts : 2.6134477578170023, MaxMutualInformation : 0.26343480307106, MaxNominalAttDistinctValues : 8.0, MaxSkewnessOfNumericAtts : 4.411711376693278, MaxStdDevOfNumericAtts : 1.8756448367786893, MeanAttributeEntropy : 1.1603538163656784, MeanKurtosisOfNumericAtts : 6.157034389713749, MeanMeansOfNumericAtts : 2.0712172526056696, MeanMutualInformation : 0.08231615858496934, MeanNoiseToSignalRatio : 13.096306682823695, MeanNominalAttDistinctValues : 3.0, MeanSkewnessOfNumericAtts : 1.9564480044895443, MeanStdDevOfNumericAtts : 1.0314658867612234, MinAttributeEntropy : 0.3452749744006563, MinKurtosisOfNumericAtts : -0.6385294023659185, MinMeansOfNumericAtts : 1.0865320000000074, MinMutualInformation : 0.00885303321242, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : 0.2818333847334242, MinStdDevOfNumericAtts : 0.3679732984584612, MinorityClassPercentage : 1.6507999999999998, MinorityClassSize : 16508.0, NaiveBayesAUC : 0.9468313845335228, NaiveBayesErrRate : 0.130285, NaiveBayesKappa : 0.7587133376199122, NumberOfBinaryFeatures : 9.0, NumberOfClasses : 4.0, NumberOfFeatures : 19.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 3.0, NumberOfSymbolicFeatures : 16.0, PercentageOfBinaryFeatures : 47.368421052631575, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 15.789473684210526, PercentageOfSymbolicFeatures : 84.21052631578947, Quartile1AttributeEntropy : 0.7877261423146888, Quartile1KurtosisOfNumericAtts : -0.6385294023659185, Quartile1MeansOfNumericAtts : 1.0865320000000074, Quartile1MutualInformation : 0.03861619480877, Quartile1SkewnessOfNumericAtts : 0.2818333847334242, Quartile1StdDevOfNumericAtts : 0.3679732984584612, Quartile2AttributeEntropy : 0.9913699371659555, Quartile2KurtosisOfNumericAtts : 0.35637186720964387, Quartile2MeansOfNumericAtts : 2.5136719999999984, Quartile2MutualInformation : 0.06535685954581, Quartile2SkewnessOfNumericAtts : 1.1757992520419303, Quartile2StdDevOfNumericAtts : 0.8507795250465198, Quartile3AttributeEntropy : 1.6373342149481895, Quartile3KurtosisOfNumericAtts : 18.75326070429752, Quartile3MeansOfNumericAtts : 2.6134477578170023, Quartile3MutualInformation : 0.11813432242576, Quartile3SkewnessOfNumericAtts : 4.411711376693278, Quartile3StdDevOfNumericAtts : 1.8756448367786893, REPTreeDepth1AUC : 0.9654312871259095, REPTreeDepth1ErrRate : 0.089084, REPTreeDepth1Kappa : 0.8325292469524697, REPTreeDepth2AUC : 0.9654312871259095, REPTreeDepth2ErrRate : 0.089084, REPTreeDepth2Kappa : 0.8325292469524697, REPTreeDepth3AUC : 0.9654312871259095, REPTreeDepth3ErrRate : 0.089084, REPTreeDepth3Kappa : 0.8325292469524697, RandomTreeDepth1AUC : 0.9148261423353723, RandomTreeDepth1ErrRate : 0.10737, RandomTreeDepth1Kappa : 0.7986168101743286, RandomTreeDepth2AUC : 0.9148261423353723, RandomTreeDepth2ErrRate : 0.10737, RandomTreeDepth2Kappa : 0.7986168101743286, RandomTreeDepth3AUC : 0.9148261423353723, RandomTreeDepth3ErrRate : 0.10737, RandomTreeDepth3Kappa : 0.7986168101743286, StdvNominalAttDistinctValues : 1.591644851508443, kNN1NAUC : 0.9389865952001077, kNN1NErrRate : 0.100868, kNN1NKappa : 0.8096255087337635,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 100515.0,\n", + " 'MaxNominalAttDistinctValues': 10.0,\n", + " 'MinorityClassSize': 99530.0,\n", + " 'NumberOfClasses': 10.0,\n", + " 'NumberOfFeatures': 77.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 76.0,\n", + " 'NumberOfSymbolicFeatures': 1.0,\n", + " 'Unnamed: 0': 171,\n", + " 'did': 250,\n", + " 'features': '0 : [0 - att1 (numeric)], 1 : [1 - att2 (numeric)], 2 : [2 - att3 (numeric)], 3 : [3 - att4 (numeric)], 4 : [4 - att5 (numeric)], 5 : [5 - att6 (numeric)], 6 : [6 - att7 (numeric)], 7 : [7 - att8 (numeric)], 8 : [8 - att9 (numeric)], 9 : [9 - att10 (numeric)], 10 : [10 - att11 (numeric)], 11 : [11 - att12 (numeric)], 12 : [12 - att13 (numeric)], 13 : [13 - att14 (numeric)], 14 : [14 - att15 (numeric)], 15 : [15 - att16 (numeric)], 16 : [16 - att17 (numeric)], 17 : [17 - att18 (numeric)], 18 : [18 - att19 (numeric)], 19 : [19 - att20 (numeric)], 20 : [20 - att21 (numeric)], 21 : [21 - att22 (numeric)], 22 : [22 - att23 (numeric)], 23 : [23 - att24 (numeric)], 24 : [24 - att25 (numeric)], 25 : [25 - att26 (numeric)], 26 : [26 - att27 (numeric)], 27 : [27 - att28 (numeric)], 28 : [28 - att29 (numeric)], 29 : [29 - att30 (numeric)], 30 : [30 - att31 (numeric)], 31 : [31 - att32 (numeric)], 32 : [32 - att33 (numeric)], 33 : [33 - att34 (numeric)], 34 : [34 - att35 (numeric)], 35 : [35 - att36 (numeric)], 36 : [36 - att37 (numeric)], 37 : [37 - att38 (numeric)], 38 : [38 - att39 (numeric)], 39 : [39 - att40 (numeric)], 40 : [40 - att41 (numeric)], 41 : [41 - att42 (numeric)], 42 : [42 - att43 (numeric)], 43 : [43 - att44 (numeric)], 44 : [44 - att45 (numeric)], 45 : [45 - att46 (numeric)], 46 : [46 - att47 (numeric)], 47 : [47 - att48 (numeric)], 48 : [48 - att49 (numeric)], 49 : [49 - att50 (numeric)], 50 : [50 - att51 (numeric)], 51 : [51 - att52 (numeric)], 52 : [52 - att53 (numeric)], 53 : [53 - att54 (numeric)], 54 : [54 - att55 (numeric)], 55 : [55 - att56 (numeric)], 56 : [56 - att57 (numeric)], 57 : [57 - att58 (numeric)], 58 : [58 - att59 (numeric)], 59 : [59 - att60 (numeric)], 60 : [60 - att61 (numeric)], 61 : [61 - att62 (numeric)], 62 : [62 - att63 (numeric)], 63 : [63 - att64 (numeric)], 64 : [64 - att65 (numeric)], 65 : [65 - att66 (numeric)], 66 : [66 - att67 (numeric)], 67 : [67 - att68 (numeric)], 68 : [68 - att69 (numeric)], 69 : [69 - att70 (numeric)], 70 : [70 - att71 (numeric)], 71 : [71 - att72 (numeric)], 72 : [72 - att73 (numeric)], 73 : [73 - att74 (numeric)], 74 : [74 - att75 (numeric)], 75 : [75 - att76 (numeric)], 76 : [76 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(mfeat-fourier)',\n", + " 'qualities': 'AutoCorrelation : 0.09995609995609996, CfsSubsetEval_DecisionStumpAUC : 0.8921734291380132, CfsSubsetEval_DecisionStumpErrRate : 0.203381, CfsSubsetEval_DecisionStumpKappa : 0.7740212639581063, CfsSubsetEval_NaiveBayesAUC : 0.8921734291380132, CfsSubsetEval_NaiveBayesErrRate : 0.203381, CfsSubsetEval_NaiveBayesKappa : 0.7740212639581063, CfsSubsetEval_kNN1NAUC : 0.8921734291380132, CfsSubsetEval_kNN1NErrRate : 0.203381, CfsSubsetEval_kNN1NKappa : 0.7740212639581063, ClassEntropy : 3.3219227318547007, DecisionStumpAUC : 0.7109731867646303, DecisionStumpErrRate : 0.805155, DecisionStumpKappa : 0.10529015009179829, Dimensionality : 7.7e-05, EquivalentNumberOfAtts : nan, J48.00001.AUC : 0.8894162192689907, J48.00001.ErrRate : 0.206049, J48.00001.Kappa : 0.7710566731554293, J48.0001.AUC : 0.8894162192689907, J48.0001.ErrRate : 0.206049, J48.0001.Kappa : 0.7710566731554293, J48.001.AUC : 0.8894162192689907, J48.001.ErrRate : 0.206049, J48.001.Kappa : 0.7710566731554293, MajorityClassPercentage : 10.051499999999999, MajorityClassSize : 100515.0, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 0.6624318898743744, MaxMeansOfNumericAtts : 0.377149516035, MaxMutualInformation : nan, MaxNominalAttDistinctValues : 10.0, MaxSkewnessOfNumericAtts : 0.877977217763201, MaxStdDevOfNumericAtts : 0.1761529503096837, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.24796506036691535, MeanMeansOfNumericAtts : 0.13189745775739473, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : 10.0, MeanSkewnessOfNumericAtts : 0.4959078781308409, MeanStdDevOfNumericAtts : 0.06698397495556621, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.1546436293959819, MinMeansOfNumericAtts : 0.071795535224, MinMutualInformation : nan, MinNominalAttDistinctValues : 10.0, MinSkewnessOfNumericAtts : -0.12215327407774229, MinStdDevOfNumericAtts : 0.03754417585218609, MinorityClassPercentage : 9.953, MinorityClassSize : 99530.0, NaiveBayesAUC : 0.9841125536254328, NaiveBayesErrRate : 0.167491, NaiveBayesKappa : 0.813899488564289, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 10.0, NumberOfFeatures : 77.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 76.0, NumberOfSymbolicFeatures : 1.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 98.7012987012987, PercentageOfSymbolicFeatures : 1.2987012987012987, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -0.406046313038389, Quartile1MeansOfNumericAtts : 0.08562420371724999, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : 0.37830246153929625, Quartile1StdDevOfNumericAtts : 0.042614212594271055, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -0.25087992094015243, Quartile2MeansOfNumericAtts : 0.10541226061600001, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.5077806078952078, Quartile2StdDevOfNumericAtts : 0.052663183689287144, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -0.057890037692025054, Quartile3MeansOfNumericAtts : 0.15621728845225, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.6744091647832666, Quartile3StdDevOfNumericAtts : 0.08653997068362977, REPTreeDepth1AUC : 0.9653716870406467, REPTreeDepth1ErrRate : 0.186312, REPTreeDepth1Kappa : 0.7929864520704425, REPTreeDepth2AUC : 0.9653716870406467, REPTreeDepth2ErrRate : 0.186312, REPTreeDepth2Kappa : 0.7929864520704425, REPTreeDepth3AUC : 0.9653716870406467, REPTreeDepth3ErrRate : 0.186312, REPTreeDepth3Kappa : 0.7929864520704425, RandomTreeDepth1AUC : 0.8343781387714035, RandomTreeDepth1ErrRate : 0.298118, RandomTreeDepth1Kappa : 0.6687575197766904, RandomTreeDepth2AUC : 0.8343781387714035, RandomTreeDepth2ErrRate : 0.298118, RandomTreeDepth2Kappa : 0.6687575197766904, RandomTreeDepth3AUC : 0.8343781387714035, RandomTreeDepth3ErrRate : 0.298118, RandomTreeDepth3Kappa : 0.6687575197766904, StdvNominalAttDistinctValues : 0.0, kNN1NAUC : 0.8639133786025055, kNN1NErrRate : 0.244952, kNN1NKappa : 0.7278308053011991,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 423139.0,\n", + " 'MaxNominalAttDistinctValues': 108.0,\n", + " 'MinorityClassSize': 95207.0,\n", + " 'NumberOfClasses': 6.0,\n", + " 'NumberOfFeatures': 13.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 3.0,\n", + " 'NumberOfSymbolicFeatures': 10.0,\n", + " 'Unnamed: 0': 174,\n", + " 'did': 253,\n", + " 'features': '0 : [0 - IDENTIF (nominal)], 1 : [1 - RIVER (nominal)], 2 : [2 - LOCATION (nominal)], 3 : [3 - ERECTED (numeric)], 4 : [4 - PURPOSE (nominal)], 5 : [5 - LENGTH (numeric)], 6 : [6 - LANES (numeric)], 7 : [7 - CLEAR-G (nominal)], 8 : [8 - T-OR-D (nominal)], 9 : [9 - MATERIAL (nominal)], 10 : [10 - SPAN (nominal)], 11 : [11 - REL-L (nominal)], 12 : [12 - TYPE (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(bridges_version1)',\n", + " 'qualities': 'AutoCorrelation : 0.24726624726624727, CfsSubsetEval_DecisionStumpAUC : 0.9013235091316036, CfsSubsetEval_DecisionStumpErrRate : 0.239257, CfsSubsetEval_DecisionStumpKappa : 0.6774847554514949, CfsSubsetEval_NaiveBayesAUC : 0.9013235091316036, CfsSubsetEval_NaiveBayesErrRate : 0.239257, CfsSubsetEval_NaiveBayesKappa : 0.6774847554514949, CfsSubsetEval_kNN1NAUC : 0.9013235091316036, CfsSubsetEval_kNN1NErrRate : 0.239257, CfsSubsetEval_kNN1NKappa : 0.6774847554514949, ClassEntropy : 2.3111914001313076, DecisionStumpAUC : 0.6420991031898386, DecisionStumpErrRate : 0.45358, DecisionStumpKappa : 0.2734045446768764, Dimensionality : 1.3e-05, EquivalentNumberOfAtts : 11.648034501491923, J48.00001.AUC : 0.9277116524218614, J48.00001.ErrRate : 0.222551, J48.00001.Kappa : 0.7011230238646798, J48.0001.AUC : 0.9277116524218614, J48.0001.ErrRate : 0.222551, J48.0001.Kappa : 0.7011230238646798, J48.001.AUC : 0.9277116524218614, J48.001.ErrRate : 0.222551, J48.001.Kappa : 0.7011230238646798, MajorityClassPercentage : 42.3139, MajorityClassSize : 423139.0, MaxAttributeEntropy : 6.741653104159785, MaxKurtosisOfNumericAtts : 2.0172116916268146, MaxMeansOfNumericAtts : 1904.954559552158, MaxMutualInformation : 0.39516698569579, MaxNominalAttDistinctValues : 108.0, MaxSkewnessOfNumericAtts : 1.4769781921355956, MaxStdDevOfNumericAtts : 707.812837301915, MeanAttributeEntropy : 2.334660760876463, MeanKurtosisOfNumericAtts : 0.9285403013167219, MeanMeansOfNumericAtts : 1158.9873723365604, MeanMutualInformation : 0.19841900363836334, MeanNoiseToSignalRatio : 10.766316320847949, MeanNominalAttDistinctValues : 18.899999999999995, MeanSkewnessOfNumericAtts : 0.9263922280697658, MeanStdDevOfNumericAtts : 248.64962036728954, MinAttributeEntropy : 0.6275905895720326, MinKurtosisOfNumericAtts : -0.41831619940171727, MinMeansOfNumericAtts : 2.593721999999993, MinMutualInformation : 0.07202002830771, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : -0.09442560859996699, MinStdDevOfNumericAtts : 1.211040731494273, MinorityClassPercentage : 9.5207, MinorityClassSize : 95207.0, NaiveBayesAUC : 0.9364007601799215, NaiveBayesErrRate : 0.257371, NaiveBayesKappa : 0.6500644214878881, NumberOfBinaryFeatures : 2.0, NumberOfClasses : 6.0, NumberOfFeatures : 13.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 3.0, NumberOfSymbolicFeatures : 10.0, PercentageOfBinaryFeatures : 15.384615384615385, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 23.076923076923077, PercentageOfSymbolicFeatures : 76.92307692307693, Quartile1AttributeEntropy : 1.039817026284356, Quartile1KurtosisOfNumericAtts : -0.41831619940171727, Quartile1MeansOfNumericAtts : 2.593721999999993, Quartile1MutualInformation : 0.120056977311615, Quartile1SkewnessOfNumericAtts : -0.09442560859996699, Quartile1StdDevOfNumericAtts : 1.211040731494273, Quartile2AttributeEntropy : 1.3607191418046507, Quartile2KurtosisOfNumericAtts : 1.1867254117250687, Quartile2MeansOfNumericAtts : 1569.413835457523, Quartile2MutualInformation : 0.19956188200722, Quartile2SkewnessOfNumericAtts : 1.396624100673669, Quartile2StdDevOfNumericAtts : 36.92498306845929, Quartile3AttributeEntropy : 3.708889792926569, Quartile3KurtosisOfNumericAtts : 2.0172116916268146, Quartile3MeansOfNumericAtts : 1904.954559552158, Quartile3MutualInformation : 0.244558403316625, Quartile3SkewnessOfNumericAtts : 1.4769781921355956, Quartile3StdDevOfNumericAtts : 707.812837301915, REPTreeDepth1AUC : 0.8979153822621934, REPTreeDepth1ErrRate : 0.283543, REPTreeDepth1Kappa : 0.6147398984209902, REPTreeDepth2AUC : 0.8979153822621934, REPTreeDepth2ErrRate : 0.283543, REPTreeDepth2Kappa : 0.6147398984209902, REPTreeDepth3AUC : 0.8979153822621934, REPTreeDepth3ErrRate : 0.283543, REPTreeDepth3Kappa : 0.6147398984209902, RandomTreeDepth1AUC : 0.8287258432327871, RandomTreeDepth1ErrRate : 0.313444, RandomTreeDepth1Kappa : 0.580962925530154, RandomTreeDepth2AUC : 0.8287258432327871, RandomTreeDepth2ErrRate : 0.313444, RandomTreeDepth2Kappa : 0.580962925530154, RandomTreeDepth3AUC : 0.8287258432327871, RandomTreeDepth3ErrRate : 0.313444, RandomTreeDepth3Kappa : 0.580962925530154, StdvNominalAttDistinctValues : 35.13608464875454, kNN1NAUC : 0.803322308049043, kNN1NErrRate : 0.303501, kNN1NKappa : 0.5933954863853881,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 100289.0,\n", + " 'MaxNominalAttDistinctValues': 10.0,\n", + " 'MinorityClassSize': 99797.0,\n", + " 'NumberOfClasses': 10.0,\n", + " 'NumberOfFeatures': 48.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 47.0,\n", + " 'NumberOfSymbolicFeatures': 1.0,\n", + " 'Unnamed: 0': 175,\n", + " 'did': 254,\n", + " 'features': '0 : [0 - att1 (numeric)], 1 : [1 - att2 (numeric)], 2 : [2 - att3 (numeric)], 3 : [3 - att4 (numeric)], 4 : [4 - att5 (numeric)], 5 : [5 - att6 (numeric)], 6 : [6 - att7 (numeric)], 7 : [7 - att8 (numeric)], 8 : [8 - att9 (numeric)], 9 : [9 - att10 (numeric)], 10 : [10 - att11 (numeric)], 11 : [11 - att12 (numeric)], 12 : [12 - att13 (numeric)], 13 : [13 - att14 (numeric)], 14 : [14 - att15 (numeric)], 15 : [15 - att16 (numeric)], 16 : [16 - att17 (numeric)], 17 : [17 - att18 (numeric)], 18 : [18 - att19 (numeric)], 19 : [19 - att20 (numeric)], 20 : [20 - att21 (numeric)], 21 : [21 - att22 (numeric)], 22 : [22 - att23 (numeric)], 23 : [23 - att24 (numeric)], 24 : [24 - att25 (numeric)], 25 : [25 - att26 (numeric)], 26 : [26 - att27 (numeric)], 27 : [27 - att28 (numeric)], 28 : [28 - att29 (numeric)], 29 : [29 - att30 (numeric)], 30 : [30 - att31 (numeric)], 31 : [31 - att32 (numeric)], 32 : [32 - att33 (numeric)], 33 : [33 - att34 (numeric)], 34 : [34 - att35 (numeric)], 35 : [35 - att36 (numeric)], 36 : [36 - att37 (numeric)], 37 : [37 - att38 (numeric)], 38 : [38 - att39 (numeric)], 39 : [39 - att40 (numeric)], 40 : [40 - att41 (numeric)], 41 : [41 - att42 (numeric)], 42 : [42 - att43 (numeric)], 43 : [43 - att44 (numeric)], 44 : [44 - att45 (numeric)], 45 : [45 - att46 (numeric)], 46 : [46 - att47 (numeric)], 47 : [47 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(mfeat-zernike)',\n", + " 'qualities': 'AutoCorrelation : 0.10008510008510009, CfsSubsetEval_DecisionStumpAUC : 0.85175133965445, CfsSubsetEval_DecisionStumpErrRate : 0.290905, CfsSubsetEval_DecisionStumpKappa : 0.6767716935011607, CfsSubsetEval_NaiveBayesAUC : 0.85175133965445, CfsSubsetEval_NaiveBayesErrRate : 0.290905, CfsSubsetEval_NaiveBayesKappa : 0.6767716935011607, CfsSubsetEval_kNN1NAUC : 0.85175133965445, CfsSubsetEval_kNN1NErrRate : 0.290905, CfsSubsetEval_kNN1NKappa : 0.6767716935011607, ClassEntropy : 3.3219261584997604, DecisionStumpAUC : 0.6754668115933892, DecisionStumpErrRate : 0.812069, DecisionStumpKappa : 0.09753866894690903, Dimensionality : 4.8e-05, EquivalentNumberOfAtts : nan, J48.00001.AUC : 0.849088585771426, J48.00001.ErrRate : 0.290867, J48.00001.Kappa : 0.676813849289966, J48.0001.AUC : 0.849088585771426, J48.0001.ErrRate : 0.290867, J48.0001.Kappa : 0.676813849289966, J48.001.AUC : 0.849088585771426, J48.001.ErrRate : 0.290867, J48.001.Kappa : 0.676813849289966, MajorityClassPercentage : 10.0289, MajorityClassSize : 100289.0, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 2.233970518023533, MaxMeansOfNumericAtts : 507.70880623642, MaxMutualInformation : nan, MaxNominalAttDistinctValues : 10.0, MaxSkewnessOfNumericAtts : 1.5034817027716705, MaxStdDevOfNumericAtts : 124.1855282325309, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : 0.28951301210976677, MeanMeansOfNumericAtts : 88.12085155366289, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : 10.0, MeanSkewnessOfNumericAtts : 0.7479523196167397, MeanStdDevOfNumericAtts : 39.712310030701126, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -0.9782672605059473, MinMeansOfNumericAtts : 0.078578113189, MinMutualInformation : nan, MinNominalAttDistinctValues : 10.0, MinSkewnessOfNumericAtts : 0.01647016113950717, MinStdDevOfNumericAtts : 0.06662622791953339, MinorityClassPercentage : 9.9797, MinorityClassSize : 99797.0, NaiveBayesAUC : 0.9529759652602074, NaiveBayesErrRate : 0.30974, NaiveBayesKappa : 0.6558491286808068, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 10.0, NumberOfFeatures : 48.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 47.0, NumberOfSymbolicFeatures : 1.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 97.91666666666666, PercentageOfSymbolicFeatures : 2.083333333333333, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -0.557033653469754, Quartile1MeansOfNumericAtts : 7.3240318655120005, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : 0.37749189297088553, Quartile1StdDevOfNumericAtts : 3.520743342449164, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : 0.023982765584618093, Quartile2MeansOfNumericAtts : 68.78292971411099, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.7367392622784438, Quartile2StdDevOfNumericAtts : 37.10292199410516, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 1.1153519446718891, Quartile3MeansOfNumericAtts : 126.526860062867, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 1.1253213168970015, Quartile3StdDevOfNumericAtts : 64.82513915389295, REPTreeDepth1AUC : 0.9369537126524267, REPTreeDepth1ErrRate : 0.271281, REPTreeDepth1Kappa : 0.6985768944698579, REPTreeDepth2AUC : 0.9369537126524267, REPTreeDepth2ErrRate : 0.271281, REPTreeDepth2Kappa : 0.6985768944698579, REPTreeDepth3AUC : 0.9369537126524267, REPTreeDepth3ErrRate : 0.271281, REPTreeDepth3Kappa : 0.6985768944698579, RandomTreeDepth1AUC : 0.8057229966595045, RandomTreeDepth1ErrRate : 0.349693, RandomTreeDepth1Kappa : 0.6114522203235186, RandomTreeDepth2AUC : 0.8057229966595045, RandomTreeDepth2ErrRate : 0.349693, RandomTreeDepth2Kappa : 0.6114522203235186, RandomTreeDepth3AUC : 0.8057229966595045, RandomTreeDepth3ErrRate : 0.349693, RandomTreeDepth3Kappa : 0.6114522203235186, StdvNominalAttDistinctValues : 0.0, kNN1NAUC : 0.8508827901265956, kNN1NErrRate : 0.268404, kNN1NKappa : 0.7017730453385455,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 23567.0,\n", + " 'MaxNominalAttDistinctValues': 4.0,\n", + " 'MinorityClassSize': 12447.0,\n", + " 'NumberOfClasses': 3.0,\n", + " 'NumberOfFeatures': 10.0,\n", + " 'NumberOfInstances': 55296.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 2.0,\n", + " 'NumberOfSymbolicFeatures': 8.0,\n", + " 'Unnamed: 0': 176,\n", + " 'did': 255,\n", + " 'features': '0 : [0 - Wifes_age (numeric)], 1 : [1 - Wifes_education (nominal)], 2 : [2 - Husbands_education (nominal)], 3 : [3 - Number_of_children_ever_born (numeric)], 4 : [4 - Wifes_religion (nominal)], 5 : [5 - Wifes_now_working%3F (nominal)], 6 : [6 - Husbands_occupation (nominal)], 7 : [7 - Standard-of-living_index (nominal)], 8 : [8 - Media_exposure (nominal)], 9 : [9 - Contraceptive_method_used (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(cmc)',\n", + " 'qualities': 'AutoCorrelation : 0.35509539741387103, CfsSubsetEval_DecisionStumpAUC : 0.7079557728582438, CfsSubsetEval_DecisionStumpErrRate : 0.44288917824074076, CfsSubsetEval_DecisionStumpKappa : 0.3113651469019138, CfsSubsetEval_NaiveBayesAUC : 0.7079557728582438, CfsSubsetEval_NaiveBayesErrRate : 0.44288917824074076, CfsSubsetEval_NaiveBayesKappa : 0.3113651469019138, CfsSubsetEval_kNN1NAUC : 0.7079557728582438, CfsSubsetEval_kNN1NErrRate : 0.44288917824074076, CfsSubsetEval_kNN1NKappa : 0.3113651469019138, ClassEntropy : 1.5386700242390772, DecisionStumpAUC : 0.5778705088269857, DecisionStumpErrRate : 0.5523365162037037, DecisionStumpKappa : 0.15217830175565578, Dimensionality : 0.0001808449074074074, EquivalentNumberOfAtts : 54.028852146140544, J48.00001.AUC : 0.7009205299199321, J48.00001.ErrRate : 0.4432508680555556, J48.00001.Kappa : 0.30994558745610484, J48.0001.AUC : 0.7009205299199321, J48.0001.ErrRate : 0.4432508680555556, J48.0001.Kappa : 0.30994558745610484, J48.001.AUC : 0.7009205299199321, J48.001.ErrRate : 0.4432508680555556, J48.001.Kappa : 0.30994558745610484, MajorityClassPercentage : 42.6197193287037, MajorityClassSize : 23567.0, MaxAttributeEntropy : 1.8711440836777449, MaxKurtosisOfNumericAtts : 0.7706791387138678, MaxMeansOfNumericAtts : 32.56203415013744, MaxMutualInformation : 0.07116423354502, MaxNominalAttDistinctValues : 4.0, MaxSkewnessOfNumericAtts : 1.010292874296068, MaxStdDevOfNumericAtts : 8.227193796131264, MeanAttributeEntropy : 1.2302210371344358, MeanKurtosisOfNumericAtts : -0.09287618208151649, MeanMeansOfNumericAtts : 17.913357144033927, MeanMutualInformation : 0.02847867321106857, MeanNoiseToSignalRatio : 42.197975833238466, MeanNominalAttDistinctValues : 3.125, MeanSkewnessOfNumericAtts : 0.6349896367565343, MeanStdDevOfNumericAtts : 5.283469130973664, MinAttributeEntropy : 0.39140313050723713, MinKurtosisOfNumericAtts : -0.9564315028769008, MinMeansOfNumericAtts : 3.2646801379304105, MinMutualInformation : 0.00311962294887, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : 0.2596863992170007, MinStdDevOfNumericAtts : 2.3397444658160653, MinorityClassPercentage : 22.509765625, MinorityClassSize : 12447.0, NaiveBayesAUC : 0.6899931165081491, NaiveBayesErrRate : 0.48835358796296297, NaiveBayesKappa : 0.25838684658393896, NumberOfBinaryFeatures : 3.0, NumberOfClasses : 3.0, NumberOfFeatures : 10.0, NumberOfInstances : 55296.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 2.0, NumberOfSymbolicFeatures : 8.0, PercentageOfBinaryFeatures : 30.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 20.0, PercentageOfSymbolicFeatures : 80.0, Quartile1AttributeEntropy : 0.612350853498725, Quartile1KurtosisOfNumericAtts : -0.9564315028769008, Quartile1MeansOfNumericAtts : 3.2646801379304105, Quartile1MutualInformation : 0.0100154702513, Quartile1SkewnessOfNumericAtts : 0.2596863992170007, Quartile1StdDevOfNumericAtts : 2.3397444658160653, Quartile2AttributeEntropy : 1.4674317458734099, Quartile2KurtosisOfNumericAtts : -0.09287618208151649, Quartile2MeansOfNumericAtts : 17.913357144033924, Quartile2MutualInformation : 0.03022761438771, Quartile2SkewnessOfNumericAtts : 0.6349896367565344, Quartile2StdDevOfNumericAtts : 5.283469130973664, Quartile3AttributeEntropy : 1.7667707988363959, Quartile3KurtosisOfNumericAtts : 0.7706791387138678, Quartile3MeansOfNumericAtts : 32.56203415013744, Quartile3MutualInformation : 0.03923562284896, Quartile3SkewnessOfNumericAtts : 1.010292874296068, Quartile3StdDevOfNumericAtts : 8.227193796131264, REPTreeDepth1AUC : 0.7074435114638705, REPTreeDepth1ErrRate : 0.45361328125, REPTreeDepth1Kappa : 0.2924086118531553, REPTreeDepth2AUC : 0.7074435114638705, REPTreeDepth2ErrRate : 0.45361328125, REPTreeDepth2Kappa : 0.2924086118531553, REPTreeDepth3AUC : 0.7074435114638705, REPTreeDepth3ErrRate : 0.45361328125, REPTreeDepth3Kappa : 0.2924086118531553, RandomTreeDepth1AUC : 0.5902685503090401, RandomTreeDepth1ErrRate : 0.5304904513888888, RandomTreeDepth1Kappa : 0.1795787598849843, RandomTreeDepth2AUC : 0.5902685503090401, RandomTreeDepth2ErrRate : 0.5304904513888888, RandomTreeDepth2Kappa : 0.1795787598849843, RandomTreeDepth3AUC : 0.5902685503090401, RandomTreeDepth3ErrRate : 0.5304904513888888, RandomTreeDepth3Kappa : 0.1795787598849843, StdvNominalAttDistinctValues : 0.9910312089651149, kNN1NAUC : 0.5928637737633861, kNN1NErrRate : 0.5276150173611112, kNN1NKappa : 0.18486530131892978,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 630221.0,\n", + " 'MaxNominalAttDistinctValues': 6.0,\n", + " 'MinorityClassSize': 369779.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 23.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 7.0,\n", + " 'NumberOfSymbolicFeatures': 16.0,\n", + " 'Unnamed: 0': 178,\n", + " 'did': 257,\n", + " 'features': '0 : [0 - surgery (nominal)], 1 : [1 - Age (nominal)], 2 : [2 - rectal_temperature (numeric)], 3 : [3 - pulse (numeric)], 4 : [4 - respiratory_rate (numeric)], 5 : [5 - temp_extremities (nominal)], 6 : [6 - peripheral_pulse (nominal)], 7 : [7 - mucous_membranes (nominal)], 8 : [8 - capillary_refill_time (nominal)], 9 : [9 - pain (nominal)], 10 : [10 - peristalsis (nominal)], 11 : [11 - abdominal_distension (nominal)], 12 : [12 - nasogastric_tube (nominal)], 13 : [13 - nasogastric_reflux (nominal)], 14 : [14 - nasogastric_reflux_PH (numeric)], 15 : [15 - rectal_examination (nominal)], 16 : [16 - abdomen (nominal)], 17 : [17 - packed_cell_volume (numeric)], 18 : [18 - total_protein (numeric)], 19 : [19 - abdominocentesis_appearance (nominal)], 20 : [20 - abdomcentesis_total_protein (numeric)], 21 : [21 - outcome (nominal)], 22 : [22 - surgical_lesion (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(colic)',\n", + " 'qualities': 'AutoCorrelation : 0.5336065336065337, CfsSubsetEval_DecisionStumpAUC : 0.9206842128679066, CfsSubsetEval_DecisionStumpErrRate : 0.12393, CfsSubsetEval_DecisionStumpKappa : 0.7321887731482503, CfsSubsetEval_NaiveBayesAUC : 0.9206842128679066, CfsSubsetEval_NaiveBayesErrRate : 0.12393, CfsSubsetEval_NaiveBayesKappa : 0.7321887731482503, CfsSubsetEval_kNN1NAUC : 0.9206842128679066, CfsSubsetEval_kNN1NErrRate : 0.12393, CfsSubsetEval_kNN1NKappa : 0.7321887731482503, ClassEntropy : 0.9505022518627755, DecisionStumpAUC : 0.811385945949615, DecisionStumpErrRate : 0.186838, DecisionStumpKappa : 0.608630118017707, Dimensionality : 2.3e-05, EquivalentNumberOfAtts : 15.685590400027284, J48.00001.AUC : 0.9108771598016026, J48.00001.ErrRate : 0.1097, J48.00001.Kappa : 0.7630655125498026, J48.0001.AUC : 0.9108771598016026, J48.0001.ErrRate : 0.1097, J48.0001.Kappa : 0.7630655125498026, J48.001.AUC : 0.9108771598016026, J48.001.ErrRate : 0.1097, J48.001.Kappa : 0.7630655125498026, MajorityClassPercentage : 63.0221, MajorityClassSize : 630221.0, MaxAttributeEntropy : 2.3152113001090875, MaxKurtosisOfNumericAtts : 9.122841875050248, MaxMeansOfNumericAtts : 73.500319272697, MaxMutualInformation : 0.28348205762599, MaxNominalAttDistinctValues : 6.0, MaxSkewnessOfNumericAtts : 2.836944443581676, MaxStdDevOfNumericAtts : 29.62277362827926, MeanAttributeEntropy : 1.4250647630212925, MeanKurtosisOfNumericAtts : 2.219748656802744, MeanMeansOfNumericAtts : 32.342938429725855, MeanMutualInformation : 0.060597161319546004, MeanNoiseToSignalRatio : 22.51702178764649, MeanNominalAttDistinctValues : 3.5625, MeanSkewnessOfNumericAtts : 0.546863770829829, MeanStdDevOfNumericAtts : 12.953669183737068, MinAttributeEntropy : 0.3972766056576088, MinKurtosisOfNumericAtts : -0.7051628596798087, MinMeansOfNumericAtts : 2.32043489956, MinMutualInformation : 0.00037890773564, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : -2.3974153751221685, MinStdDevOfNumericAtts : 0.6536398857393563, MinorityClassPercentage : 36.977900000000005, MinorityClassSize : 369779.0, NaiveBayesAUC : 0.9170219539583531, NaiveBayesErrRate : 0.148509, NaiveBayesKappa : 0.6814474093166725, NumberOfBinaryFeatures : 3.0, NumberOfClasses : 2.0, NumberOfFeatures : 23.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 7.0, NumberOfSymbolicFeatures : 16.0, PercentageOfBinaryFeatures : 13.043478260869565, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 30.434782608695656, PercentageOfSymbolicFeatures : 69.56521739130434, Quartile1AttributeEntropy : 1.1136241621098728, Quartile1KurtosisOfNumericAtts : -0.2058503160800167, Quartile1MeansOfNumericAtts : 6.76783748091, Quartile1MutualInformation : 0.02066693266558, Quartile1SkewnessOfNumericAtts : 0.24687711995798886, Quartile1StdDevOfNumericAtts : 1.308402984602055, Quartile2AttributeEntropy : 1.413232317341817, Quartile2KurtosisOfNumericAtts : 0.39742215707381945, Quartile2MeansOfNumericAtts : 35.145741562823, Quartile2MutualInformation : 0.0324021318614, Quartile2SkewnessOfNumericAtts : 0.8341479430006891, Quartile2StdDevOfNumericAtts : 10.372094601487213, Quartile3AttributeEntropy : 1.6585838471925973, Quartile3KurtosisOfNumericAtts : 5.635942611844879, Quartile3MeansOfNumericAtts : 45.445245627533, Quartile3MutualInformation : 0.07378886654644, Quartile3SkewnessOfNumericAtts : 0.9210740111176681, Quartile3StdDevOfNumericAtts : 27.25328161172137, REPTreeDepth1AUC : 0.938486918822373, REPTreeDepth1ErrRate : 0.111603, REPTreeDepth1Kappa : 0.7585901803733961, REPTreeDepth2AUC : 0.938486918822373, REPTreeDepth2ErrRate : 0.111603, REPTreeDepth2Kappa : 0.7585901803733961, REPTreeDepth3AUC : 0.938486918822373, REPTreeDepth3ErrRate : 0.111603, REPTreeDepth3Kappa : 0.7585901803733961, RandomTreeDepth1AUC : 0.8392734962743577, RandomTreeDepth1ErrRate : 0.153177, RandomTreeDepth1Kappa : 0.6712975370465432, RandomTreeDepth2AUC : 0.8392734962743577, RandomTreeDepth2ErrRate : 0.153177, RandomTreeDepth2Kappa : 0.6712975370465432, RandomTreeDepth3AUC : 0.8392734962743577, RandomTreeDepth3ErrRate : 0.153177, RandomTreeDepth3Kappa : 0.6712975370465432, StdvNominalAttDistinctValues : 1.1528949070347507, kNN1NAUC : 0.8431578483210511, kNN1NErrRate : 0.14619, kNN1NKappa : 0.686335639427079,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 554008.0,\n", + " 'MaxNominalAttDistinctValues': 14.0,\n", + " 'MinorityClassSize': 445992.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 16.0,\n", + " 'NumberOfInstances': 1000000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 6.0,\n", + " 'NumberOfSymbolicFeatures': 10.0,\n", + " 'Unnamed: 0': 179,\n", + " 'did': 258,\n", + " 'features': '0 : [0 - A1 (nominal)], 1 : [1 - A2 (numeric)], 2 : [2 - A3 (numeric)], 3 : [3 - A4 (nominal)], 4 : [4 - A5 (nominal)], 5 : [5 - A6 (nominal)], 6 : [6 - A7 (nominal)], 7 : [7 - A8 (numeric)], 8 : [8 - A9 (nominal)], 9 : [9 - A10 (nominal)], 10 : [10 - A11 (numeric)], 11 : [11 - A12 (nominal)], 12 : [12 - A13 (nominal)], 13 : [13 - A14 (numeric)], 14 : [14 - A15 (numeric)], 15 : [15 - class (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'BNG(credit-a)',\n", + " 'qualities': 'AutoCorrelation : 0.5055695055695055, CfsSubsetEval_DecisionStumpAUC : 0.9236428160342482, CfsSubsetEval_DecisionStumpErrRate : 0.123302, CfsSubsetEval_DecisionStumpKappa : 0.7500795603564689, CfsSubsetEval_NaiveBayesAUC : 0.9236428160342482, CfsSubsetEval_NaiveBayesErrRate : 0.123302, CfsSubsetEval_NaiveBayesKappa : 0.7500795603564689, CfsSubsetEval_kNN1NAUC : 0.9236428160342482, CfsSubsetEval_kNN1NErrRate : 0.123302, CfsSubsetEval_kNN1NKappa : 0.7500795603564689, ClassEntropy : 0.9915672663814072, DecisionStumpAUC : 0.8541206799506694, DecisionStumpErrRate : 0.151383, DecisionStumpKappa : 0.6982638524973298, Dimensionality : 1.6e-05, EquivalentNumberOfAtts : 11.804035448329977, J48.00001.AUC : 0.9264494052977083, J48.00001.ErrRate : 0.109414, J48.00001.Kappa : 0.778334125212909, J48.0001.AUC : 0.9264494052977083, J48.0001.ErrRate : 0.109414, J48.0001.Kappa : 0.778334125212909, J48.001.AUC : 0.9264494052977083, J48.001.ErrRate : 0.109414, J48.001.Kappa : 0.778334125212909, MajorityClassPercentage : 55.4008, MajorityClassSize : 554008.0, MaxAttributeEntropy : 3.5228699067646363, MaxKurtosisOfNumericAtts : 16.195299364951175, MaxMeansOfNumericAtts : 1041.64972134852, MaxMutualInformation : 0.40292015145978, MaxNominalAttDistinctValues : 14.0, MaxSkewnessOfNumericAtts : 2.8686822783720123, MaxStdDevOfNumericAtts : 5269.332591656998, MeanAttributeEntropy : 1.3083589383538596, MeanKurtosisOfNumericAtts : 5.547874049739754, MeanMeansOfNumericAtts : 210.67665656673768, MeanMutualInformation : 0.08400239652971334, MeanNoiseToSignalRatio : 14.575257283178424, MeanNominalAttDistinctValues : 4.3, MeanSkewnessOfNumericAtts : 1.9186529372632175, MeanStdDevOfNumericAtts : 911.1448879076281, MinAttributeEntropy : 0.5558554785178321, MinKurtosisOfNumericAtts : 0.5691563826713018, MinMeansOfNumericAtts : 2.197265006407, MinMutualInformation : 0.00053302906036, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : 1.0770283528918647, MinStdDevOfNumericAtts : 3.3431881209706527, MinorityClassPercentage : 44.5992, MinorityClassSize : 445992.0, NaiveBayesAUC : 0.9079508400354774, NaiveBayesErrRate : 0.16913, NaiveBayesKappa : 0.6525505293390783, NumberOfBinaryFeatures : 5.0, NumberOfClasses : 2.0, NumberOfFeatures : 16.0, NumberOfInstances : 1000000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 6.0, NumberOfSymbolicFeatures : 10.0, PercentageOfBinaryFeatures : 31.25, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 37.5, PercentageOfSymbolicFeatures : 62.5, Quartile1AttributeEntropy : 0.8411127085612495, Quartile1KurtosisOfNumericAtts : 0.8640729637588276, Quartile1MeansOfNumericAtts : 2.3588195008915056, Quartile1MutualInformation : 0.0050729212277349995, Quartile1SkewnessOfNumericAtts : 1.267997504852106, Quartile1StdDevOfNumericAtts : 4.499114428848817, Quartile2AttributeEntropy : 0.9835837346010516, Quartile2KurtosisOfNumericAtts : 3.2235730941015897, Quartile2MeansOfNumericAtts : 18.040640760645502, Quartile2MutualInformation : 0.02799897137278, Quartile2SkewnessOfNumericAtts : 1.7803984198040648, Quartile2StdDevOfNumericAtts : 8.460795390952773, Quartile3AttributeEntropy : 1.5751281782620663, Quartile3KurtosisOfNumericAtts : 10.883772745106443, Quartile3MeansOfNumericAtts : 396.70168073099626, Quartile3MutualInformation : 0.1241908979419, Quartile3SkewnessOfNumericAtts : 2.722487767494337, Quartile3StdDevOfNumericAtts : 1446.623798180064, REPTreeDepth1AUC : 0.9456538530396582, REPTreeDepth1ErrRate : 0.11094, REPTreeDepth1Kappa : 0.7753240635982098, REPTreeDepth2AUC : 0.9456538530396582, REPTreeDepth2ErrRate : 0.11094, REPTreeDepth2Kappa : 0.7753240635982098, REPTreeDepth3AUC : 0.9456538530396582, REPTreeDepth3ErrRate : 0.11094, REPTreeDepth3Kappa : 0.7753240635982098, RandomTreeDepth1AUC : 0.8511524014025537, RandomTreeDepth1ErrRate : 0.147639, RandomTreeDepth1Kappa : 0.7011429677010672, RandomTreeDepth2AUC : 0.8511524014025537, RandomTreeDepth2ErrRate : 0.147639, RandomTreeDepth2Kappa : 0.7011429677010672, RandomTreeDepth3AUC : 0.8511524014025537, RandomTreeDepth3ErrRate : 0.147639, RandomTreeDepth3Kappa : 0.7011429677010672, StdvNominalAttDistinctValues : 4.029061098237817, kNN1NAUC : 0.8411233385747212, kNN1NErrRate : 0.155599, kNN1NKappa : 0.6842183003873852,',\n", + " 'status': 'active',\n", + " 'uploader': 1,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 44.0,\n", + " 'MaxNominalAttDistinctValues': 54.0,\n", + " 'MinorityClassSize': 10.0,\n", + " 'NumberOfClasses': 6.0,\n", + " 'NumberOfFeatures': 12.0,\n", + " 'NumberOfInstances': 105.0,\n", + " 'NumberOfInstancesWithMissingValues': 35.0,\n", + " 'NumberOfMissingValues': 61.0,\n", + " 'NumberOfNumericFeatures': 0.0,\n", + " 'NumberOfSymbolicFeatures': 12.0,\n", + " 'Unnamed: 0': 218,\n", + " 'description': '**Author**: Yoram Reich\",\"Steven J. Fenves \\n \\n**Source**: [original](http://openml.org/d/19) - \\n**Please cite**: \\n\\nPittsburgh bridges \\n\\nThis version is derived from version 2 (the discretized version) by removing all instances with missing values in the last (target) attribute. The bridges dataset is originally not a classification dataset, put is used so extensively in the literature, using the last attribute as the target attribute. However, this attribute has missing values, which may lead to confusing benchmarking result. Therefore, these instances have been removed. \\n\\nSources: \\n-- Yoram Reich and Steven J. Fenves Department of Civil Engineering and Engineering Design Research Center Carnegie Mellon University Pittsburgh, PA 15213 Compiled from various sources. \\n-- Date: 1 August 1990 \\n\\nAttribute Information: The type field state whether a property is continuous/integer (c) or nominal (n). For properties with c,n type, the range of continuous numbers is given first and the possible values of the nominal follow the semi-colon. \\n\\nname type possible values comments \\n------------------------------------------------------------------------ \\n1. IDENTIF - - identifier of the examples \\n2. RIVER n A, M, O \\n3. LOCATION n 1 to 52 \\n4. ERECTED c,n 1818-1986 - CRAFTS, EMERGING, MATURE, MODERN \\n5. PURPOSE n WALK, AQUEDUCT, RR, HIGHWAY \\n6. LENGTH c,n 804-4558 - SHORT, MEDIUM, LONG \\n7. LANES c,n 1, 2, 4, 6 - 1, 2, 4, 6 \\n8. CLEAR-G n N, G \\n9. T-OR-D n THROUGH, DECK \\n10. MATERIAL n WOOD, IRON, STEEL \\n11. SPAN n SHORT, MEDIUM, LONG \\n12. REL-L n S, S-F, F \\n13. TYPE n WOOD, SUSPEN, SIMPLE-T, ARCH, CANTILEV, CONT-T',\n", + " 'did': 328,\n", + " 'features': '0 : [0 - IDENTIF (nominal)], 1 : [1 - RIVER (nominal)], 2 : [2 - LOCATION (nominal)], 3 : [3 - ERECTED (nominal)], 4 : [4 - PURPOSE (nominal)], 5 : [5 - LENGTH (nominal)], 6 : [6 - LANES (nominal)], 7 : [7 - CLEAR-G (nominal)], 8 : [8 - T-OR-D (nominal)], 9 : [9 - MATERIAL (nominal)], 10 : [10 - SPAN (nominal)], 11 : [11 - REL-L (nominal)], 12 : [12 - TYPE (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'bridges',\n", + " 'qualities': 'AutoCorrelation : 0.49038461538461536, CfsSubsetEval_DecisionStumpAUC : 0.6873916110699549, CfsSubsetEval_DecisionStumpErrRate : 0.4380952380952381, CfsSubsetEval_DecisionStumpKappa : 0.2985768225384839, CfsSubsetEval_NaiveBayesAUC : 0.6873916110699549, CfsSubsetEval_NaiveBayesErrRate : 0.4380952380952381, CfsSubsetEval_NaiveBayesKappa : 0.2985768225384839, CfsSubsetEval_kNN1NAUC : 0.6873916110699549, CfsSubsetEval_kNN1NErrRate : 0.4380952380952381, CfsSubsetEval_kNN1NKappa : 0.2985768225384839, ClassEntropy : 2.317602811811176, DecisionStumpAUC : 0.6593219297314425, DecisionStumpErrRate : 0.42857142857142855, DecisionStumpKappa : 0.3105209397344228, Dimensionality : 0.11428571428571428, EquivalentNumberOfAtts : 4.8158526820324905, J48.00001.AUC : 0.79171068395272, J48.00001.ErrRate : 0.3904761904761905, J48.00001.Kappa : 0.43607545192559616, J48.0001.AUC : 0.79171068395272, J48.0001.ErrRate : 0.3904761904761905, J48.0001.Kappa : 0.43607545192559616, J48.001.AUC : 0.79171068395272, J48.001.ErrRate : 0.3904761904761905, J48.001.Kappa : 0.43607545192559616, MajorityClassPercentage : 41.904761904761905, MajorityClassSize : 44.0, MaxAttributeEntropy : 5.578205398474268, MaxKurtosisOfNumericAtts : nan, MaxMeansOfNumericAtts : nan, MaxMutualInformation : 1.55784626408911, MaxNominalAttDistinctValues : 54.0, MaxSkewnessOfNumericAtts : nan, MaxStdDevOfNumericAtts : nan, MeanAttributeEntropy : 1.6872699252760597, MeanKurtosisOfNumericAtts : nan, MeanMeansOfNumericAtts : nan, MeanMutualInformation : 0.48124454065174, MeanNoiseToSignalRatio : 2.506055202186862, MeanNominalAttDistinctValues : 7.666666666666667, MeanSkewnessOfNumericAtts : nan, MeanStdDevOfNumericAtts : nan, MinAttributeEntropy : 0.5916727785823275, MinKurtosisOfNumericAtts : nan, MinMeansOfNumericAtts : nan, MinMutualInformation : 0.16891819380606, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : nan, MinStdDevOfNumericAtts : nan, MinorityClassPercentage : 9.523809523809524, MinorityClassSize : 10.0, NaiveBayesAUC : 0.8578793089705496, NaiveBayesErrRate : 0.3238095238095238, NaiveBayesKappa : 0.5645810464690816, NumberOfBinaryFeatures : 2.0, NumberOfClasses : 6.0, NumberOfFeatures : 12.0, NumberOfInstances : 105.0, NumberOfInstancesWithMissingValues : 35.0, NumberOfMissingValues : 61.0, NumberOfNumericFeatures : 0.0, NumberOfSymbolicFeatures : 12.0, PercentageOfBinaryFeatures : 16.666666666666664, PercentageOfInstancesWithMissingValues : 33.33333333333333, PercentageOfMissingValues : 4.841269841269842, PercentageOfNumericFeatures : 0.0, PercentageOfSymbolicFeatures : 100.0, Quartile1AttributeEntropy : 1.0731491283683547, Quartile1KurtosisOfNumericAtts : nan, Quartile1MeansOfNumericAtts : nan, Quartile1MutualInformation : 0.23751139345904, Quartile1SkewnessOfNumericAtts : nan, Quartile1StdDevOfNumericAtts : nan, Quartile2AttributeEntropy : 1.4866836360458784, Quartile2KurtosisOfNumericAtts : nan, Quartile2MeansOfNumericAtts : nan, Quartile2MutualInformation : 0.36240410933397, Quartile2SkewnessOfNumericAtts : nan, Quartile2StdDevOfNumericAtts : nan, Quartile3AttributeEntropy : 1.5829772831026911, Quartile3KurtosisOfNumericAtts : nan, Quartile3MeansOfNumericAtts : nan, Quartile3MutualInformation : 0.60363739984227, Quartile3SkewnessOfNumericAtts : nan, Quartile3StdDevOfNumericAtts : nan, REPTreeDepth1AUC : 0.5279250475484106, REPTreeDepth1ErrRate : 0.6190476190476191, REPTreeDepth1Kappa : 0.02262637834741511, REPTreeDepth2AUC : 0.5279250475484106, REPTreeDepth2ErrRate : 0.6190476190476191, REPTreeDepth2Kappa : 0.02262637834741511, REPTreeDepth3AUC : 0.5279250475484106, REPTreeDepth3ErrRate : 0.6190476190476191, REPTreeDepth3Kappa : 0.02262637834741511, RandomTreeDepth1AUC : 0.7698173171361247, RandomTreeDepth1ErrRate : 0.42857142857142855, RandomTreeDepth1Kappa : 0.37582562747688236, RandomTreeDepth2AUC : 0.7698173171361247, RandomTreeDepth2ErrRate : 0.42857142857142855, RandomTreeDepth2Kappa : 0.37582562747688236, RandomTreeDepth3AUC : 0.7698173171361247, RandomTreeDepth3ErrRate : 0.42857142857142855, RandomTreeDepth3Kappa : 0.37582562747688236, StdvNominalAttDistinctValues : 14.63081016419139, kNN1NAUC : 0.7781376695997415, kNN1NErrRate : 0.3904761904761905, kNN1NKappa : 0.47881355932203395,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 4},\n", + " {'MaxNominalAttDistinctValues': 2.0,\n", + " 'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 10.0,\n", + " 'NumberOfInstances': 159.0,\n", + " 'NumberOfInstancesWithMissingValues': 6.0,\n", + " 'NumberOfMissingValues': 6.0,\n", + " 'NumberOfNumericFeatures': 5.0,\n", + " 'NumberOfSymbolicFeatures': 5.0,\n", + " 'Unnamed: 0': 363,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nanalcatdata A collection of data sets used in the book \"Analyzing Categorical Data,\"\\nby Jeffrey S. Simonoff, Springer-Verlag, New York, 2003. The submission\\nconsists of a zip file containing two versions of each of 84 data sets,\\nplus this README file. Each data set is given in comma-delimited ASCII\\n(.csv) form, and Microsoft Excel (.xls) form.\\n\\nNOTICE: These data sets may be used freely for scientific, educational and/or\\nnoncommercial purposes, provided suitable acknowledgment is given (by citing\\nthe above-named reference).\\n\\nFurther details concerning the book, including information on statistical software\\n(including sample S-PLUS/R and SAS code), are available at the web site\\n\\nhttp://www.stern.nyu.edu/~jsimonof/AnalCatData\\n\\n\\nInformation about the dataset\\nCLASSTYPE: numeric\\nCLASSINDEX: last\\n\\n\\nNote: Quotes, Single-Quotes and Backslashes were removed, Blanks replaced\\nwith Underscores',\n", + " 'did': 506,\n", + " 'features': '0 : [0 - Married (nominal)], 1 : [1 - Age (numeric)], 2 : [2 - Years_of_education (numeric)], 3 : [3 - Male (nominal)], 4 : [4 - Religious (nominal)], 5 : [5 - Sex_partners (numeric)], 6 : [6 - Income (numeric)], 7 : [7 - Drug_use (nominal)], 8 : [8 - Same_sex_relations (nominal)], 9 : [9 - AIDS_know (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'analcatdata_gsssexsurvey',\n", + " 'qualities': 'AutoCorrelation : 0.47468354430379744, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.06289308176100629, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 8.886923444813059, MaxMeansOfNumericAtts : 24555.55555555555, MaxMutualInformation : nan, MaxNominalAttDistinctValues : 2.0, MaxSkewnessOfNumericAtts : 2.9101190601750293, MaxStdDevOfNumericAtts : 17224.50495388568, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : 3.9091211561999764, MeanMeansOfNumericAtts : 4922.47966457023, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : 2.0, MeanSkewnessOfNumericAtts : 1.5446898401998712, MeanStdDevOfNumericAtts : 3448.5649830178054, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : 0.5279947743546121, MinMeansOfNumericAtts : 0.3396226415094337, MinMutualInformation : nan, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : 0.10326629350803894, MinStdDevOfNumericAtts : 0.7533065397135874, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 5.0, NumberOfClasses : 0.0, NumberOfFeatures : 10.0, NumberOfInstances : 159.0, NumberOfInstancesWithMissingValues : 6.0, NumberOfMissingValues : 6.0, NumberOfNumericFeatures : 5.0, NumberOfSymbolicFeatures : 5.0, PercentageOfBinaryFeatures : 50.0, PercentageOfInstancesWithMissingValues : 3.7735849056603774, PercentageOfMissingValues : 0.37735849056603776, PercentageOfNumericFeatures : 50.0, PercentageOfSymbolicFeatures : 50.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : 0.5376383551039363, Quartile1MeansOfNumericAtts : 1.5251572327044023, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : 0.4995955249738286, Quartile1StdDevOfNumericAtts : 1.7327600338286375, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : 1.7507886384047597, Quartile2MeansOfNumericAtts : 13.716981132075471, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 1.1388718940504805, Quartile2StdDevOfNumericAtts : 3.7893019161873753, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 8.359770216193624, Quartile3MeansOfNumericAtts : 12297.815513626832, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 2.792693128500609, Quartile3StdDevOfNumericAtts : 8617.785046552592, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : 0.0, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 11.0,\n", + " 'NumberOfInstances': 1000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 11.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 476,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 623,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c4_1000_10',\n", + " 'qualities': 'AutoCorrelation : -0.1431321730294305, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.011, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 1.9053017190944588, MaxMeansOfNumericAtts : 1.1514999891104605e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 1.4130003640032112, MaxStdDevOfNumericAtts : 1.0000000006680836, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.5092063183629136, MeanMeansOfNumericAtts : 5.750025164341646e-11, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.26265382761299244, MeanStdDevOfNumericAtts : 0.9999999994973643, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.252232783383241, MinMeansOfNumericAtts : -8.97900024332543e-10, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.17282355890184273, MinStdDevOfNumericAtts : 0.9999999983660289, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 11.0, NumberOfInstances : 1000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 11.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2467638014696125, Quartile1MeansOfNumericAtts : -3.7560000887282287e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.01865601158096305, Quartile1StdDevOfNumericAtts : 0.9999999988117013, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.2102392844900602, Quartile2MeansOfNumericAtts : 6.673004537827155e-11, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.024246496361142147, Quartile2StdDevOfNumericAtts : 0.9999999994149671, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 0.17473277546107502, Quartile3MeansOfNumericAtts : 3.340999302769987e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.7497638661886735, Quartile3StdDevOfNumericAtts : 1.0000000002006977, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 6.0,\n", + " 'NumberOfInstances': 100.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 6.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 477,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 624,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c0_100_5',\n", + " 'qualities': 'AutoCorrelation : -0.007891897050505388, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.06, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.5528786452668188, MaxMeansOfNumericAtts : 1.2699999930865235e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.354942254726109, MaxStdDevOfNumericAtts : 1.0000000038649643, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.9807632643793867, MeanMeansOfNumericAtts : -7.253333315221377e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.0024185977273379184, MeanStdDevOfNumericAtts : 1.0000000006047896, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3282931476128914, MinMeansOfNumericAtts : -3.340000052087788e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.133307630216264, MinStdDevOfNumericAtts : 0.9999999966147466, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 6.0, NumberOfInstances : 100.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 6.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.1854586273829244, Quartile1MeansOfNumericAtts : -2.455000023826592e-09, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.13020953311533182, Quartile1StdDevOfNumericAtts : 0.9999999982648644, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -0.9788320801085673, Quartile2MeansOfNumericAtts : -3.8649997424755624e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : -0.05905904653100631, Quartile2StdDevOfNumericAtts : 1.0000000012145431, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -0.8191420457216096, Quartile3MeansOfNumericAtts : 8.057500228489389e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.1188644804299394, Quartile3StdDevOfNumericAtts : 1.0000000023950186, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 51.0,\n", + " 'NumberOfInstances': 500.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 51.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 479,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 626,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)], 26 : [26 - oz27 (numeric)], 27 : [27 - oz28 (numeric)], 28 : [28 - oz29 (numeric)], 29 : [29 - oz30 (numeric)], 30 : [30 - oz31 (numeric)], 31 : [31 - oz32 (numeric)], 32 : [32 - oz33 (numeric)], 33 : [33 - oz34 (numeric)], 34 : [34 - oz35 (numeric)], 35 : [35 - oz36 (numeric)], 36 : [36 - oz37 (numeric)], 37 : [37 - oz38 (numeric)], 38 : [38 - oz39 (numeric)], 39 : [39 - oz40 (numeric)], 40 : [40 - oz41 (numeric)], 41 : [41 - oz42 (numeric)], 42 : [42 - oz43 (numeric)], 43 : [43 - oz44 (numeric)], 44 : [44 - oz45 (numeric)], 45 : [45 - oz46 (numeric)], 46 : [46 - oz47 (numeric)], 47 : [47 - oz48 (numeric)], 48 : [48 - oz49 (numeric)], 49 : [49 - oz50 (numeric)], 50 : [50 - oz51 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c2_500_50',\n", + " 'qualities': 'AutoCorrelation : -0.1268945054000004, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.102, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 0.40050970922613294, MaxMeansOfNumericAtts : 1.7419999434054034e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.8257288820876238, MaxStdDevOfNumericAtts : 1.0000000017470212, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.1476523531550133, MeanMeansOfNumericAtts : 1.186981529806204e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.015138124990254022, MeanStdDevOfNumericAtts : 0.9999999999150228, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3001476274593584, MinMeansOfNumericAtts : -1.673260040746527e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.58640368879216, MinStdDevOfNumericAtts : 0.9999999975311687, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 51.0, NumberOfInstances : 500.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 51.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2376778640025938, Quartile1MeansOfNumericAtts : -3.560000560454135e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.04717388403461312, Quartile1StdDevOfNumericAtts : 0.9999999993775461, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.2049058684259772, Quartile2MeansOfNumericAtts : 6.987996861718138e-11, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.011985080789238782, Quartile2StdDevOfNumericAtts : 1.0000000000834948, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.1491180708665234, Quartile3MeansOfNumericAtts : 4.622000169263174e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.061014959488354, Quartile3StdDevOfNumericAtts : 1.0000000007198544, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 11.0,\n", + " 'NumberOfInstances': 500.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 11.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 480,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 627,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c2_500_10',\n", + " 'qualities': 'AutoCorrelation : -0.11550001540681405, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.022, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 0.38517831285595205, MaxMeansOfNumericAtts : 9.482000321581196e-10, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.7972004342299485, MaxStdDevOfNumericAtts : 1.0000000018286623, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.9569272304070873, MeanMeansOfNumericAtts : 8.195455355971981e-11, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.04714906419946629, MeanStdDevOfNumericAtts : 0.9999999996969955, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.2705783053188873, MinMeansOfNumericAtts : -1.1432000008859687e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.6005201001977205, MinStdDevOfNumericAtts : 0.9999999981064633, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 11.0, NumberOfInstances : 500.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 11.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2332327972724195, Quartile1MeansOfNumericAtts : -4.774000403884316e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.034254004941503026, Quartile1StdDevOfNumericAtts : 0.9999999984859435, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.2099144610876031, Quartile2MeansOfNumericAtts : 9.659995803268374e-11, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.06852484125977333, Quartile2StdDevOfNumericAtts : 0.9999999996020292, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -0.7211901728348078, Quartile3MeansOfNumericAtts : 8.11800013877928e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.08999135279171033, Quartile3StdDevOfNumericAtts : 1.000000001072244, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 6.0,\n", + " 'NumberOfInstances': 1000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 6.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 481,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 628,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c3_1000_5',\n", + " 'qualities': 'AutoCorrelation : -0.12870232880826782, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.006, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 1.1536224985085624, MaxMeansOfNumericAtts : 5.123000166307179e-10, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 1.2469418106575223, MaxStdDevOfNumericAtts : 1.000000000656039, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.3339947179631272, MeanMeansOfNumericAtts : -2.5390006153092067e-11, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.27280678404407294, MeanStdDevOfNumericAtts : 0.9999999998249032, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.1996531749727037, MinMeansOfNumericAtts : -6.798000173890983e-10, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.26268514717166463, MinStdDevOfNumericAtts : 0.9999999988017908, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 6.0, NumberOfInstances : 1000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 6.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.1757650060563862, Quartile1MeansOfNumericAtts : -5.268525095636357e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.10150372560410345, Quartile1StdDevOfNumericAtts : 0.9999999989233246, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -0.40648735723980156, Quartile2MeansOfNumericAtts : 8.164990517034463e-12, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.003476917189752071, Quartile2StdDevOfNumericAtts : 1.0000000000893352, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 0.30553514931408665, Quartile3MeansOfNumericAtts : 4.840999964783599e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.8317905462748747, Quartile3StdDevOfNumericAtts : 1.0000000004258218, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 26.0,\n", + " 'NumberOfInstances': 100.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 26.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 482,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 629,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c1_100_25',\n", + " 'qualities': 'AutoCorrelation : -0.09779439888888883, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.26, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.8167471085084324, MaxMeansOfNumericAtts : 2.9259999747427435e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.3253491953587132, MaxStdDevOfNumericAtts : 1.0000000033301368, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.1067805258625716, MeanMeansOfNumericAtts : -1.5473070181696822e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : -0.052514005600623095, MeanStdDevOfNumericAtts : 0.9999999992124979, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.2581761797595237, MinMeansOfNumericAtts : -4.970000024862032e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.3603839801343778, MinStdDevOfNumericAtts : 0.9999999960025994, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 26.0, NumberOfInstances : 100.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 26.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.1897722669281043, Quartile1MeansOfNumericAtts : -1.3124999958050166e-09, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.17995230709375323, Quartile1StdDevOfNumericAtts : 0.9999999977387246, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1290672971179352, Quartile2MeansOfNumericAtts : 9.449996674071315e-11, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : -0.037178364724484256, Quartile2StdDevOfNumericAtts : 0.9999999989666395, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.0502994145771845, Quartile3MeansOfNumericAtts : 1.5297500149946686e-09, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.04009025014922827, Quartile3StdDevOfNumericAtts : 1.0000000010242138, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 51.0,\n", + " 'NumberOfInstances': 250.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 51.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 485,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 632,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)], 26 : [26 - oz27 (numeric)], 27 : [27 - oz28 (numeric)], 28 : [28 - oz29 (numeric)], 29 : [29 - oz30 (numeric)], 30 : [30 - oz31 (numeric)], 31 : [31 - oz32 (numeric)], 32 : [32 - oz33 (numeric)], 33 : [33 - oz34 (numeric)], 34 : [34 - oz35 (numeric)], 35 : [35 - oz36 (numeric)], 36 : [36 - oz37 (numeric)], 37 : [37 - oz38 (numeric)], 38 : [38 - oz39 (numeric)], 39 : [39 - oz40 (numeric)], 40 : [40 - oz41 (numeric)], 41 : [41 - oz42 (numeric)], 42 : [42 - oz43 (numeric)], 43 : [43 - oz44 (numeric)], 44 : [44 - oz45 (numeric)], 45 : [45 - oz46 (numeric)], 46 : [46 - oz47 (numeric)], 47 : [47 - oz48 (numeric)], 48 : [48 - oz49 (numeric)], 49 : [49 - oz50 (numeric)], 50 : [50 - oz51 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c3_250_50',\n", + " 'qualities': 'AutoCorrelation : -0.06448036682730894, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.204, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 1.7495107166524035, MaxMeansOfNumericAtts : 2.5303999468678784e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 1.3694217971674387, MaxStdDevOfNumericAtts : 1.0000000035578938, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.0687767190342374, MeanMeansOfNumericAtts : 2.1276815781402482e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.06788173473054655, MeanStdDevOfNumericAtts : 0.9999999997904486, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3143243017055757, MinMeansOfNumericAtts : -2.592000004852935e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.1210807693154695, MinStdDevOfNumericAtts : 0.999999996033757, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 51.0, NumberOfInstances : 250.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 51.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2388496054751004, Quartile1MeansOfNumericAtts : -1.991999845074588e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.028895106513236752, Quartile1StdDevOfNumericAtts : 0.9999999987411002, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1932137303225214, Quartile2MeansOfNumericAtts : 3.696000145936296e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.021333455299926984, Quartile2StdDevOfNumericAtts : 0.9999999997743819, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.1469225547639952, Quartile3MeansOfNumericAtts : 8.612000073071612e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.08315248174950719, Quartile3StdDevOfNumericAtts : 1.0000000010301344, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 26.0,\n", + " 'NumberOfInstances': 500.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 26.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 486,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 633,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c0_500_25',\n", + " 'qualities': 'AutoCorrelation : -0.13036079975631423, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.052, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.4449234723732558, MaxMeansOfNumericAtts : 1.1983999832487767e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.14645379310596413, MaxStdDevOfNumericAtts : 1.0000000018816029, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.152024648303206, MeanMeansOfNumericAtts : -2.599976863291677e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.01954441443739426, MeanStdDevOfNumericAtts : 0.9999999999991531, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3039901029862297, MinMeansOfNumericAtts : -1.2978600221202895e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.14502350732769076, MinStdDevOfNumericAtts : 0.9999999966562448, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 26.0, NumberOfInstances : 500.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 26.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2168936270698305, Quartile1MeansOfNumericAtts : -7.774499892199138e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.020749712473178403, Quartile1StdDevOfNumericAtts : 0.9999999994199817, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1817661435374496, Quartile2MeansOfNumericAtts : -3.609899614787082e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.01693542242465498, Quartile2StdDevOfNumericAtts : 1.0000000001767857, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.1465827310167538, Quartile3MeansOfNumericAtts : 9.285000837633285e-11, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.06858896650376467, Quartile3StdDevOfNumericAtts : 1.0000000006976897, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 11.0,\n", + " 'NumberOfInstances': 100.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 11.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 487,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 634,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c2_100_10',\n", + " 'qualities': 'AutoCorrelation : -0.05580917017171747, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.11, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 0.8250263918664871, MaxMeansOfNumericAtts : 4.720000021385396e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.901630521606414, MaxStdDevOfNumericAtts : 1.0000000025817553, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.8873727255853247, MeanMeansOfNumericAtts : 1.0969327334343534e-09, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.11144929055475318, MeanStdDevOfNumericAtts : 0.9999999998049834, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3569116235387846, MinMeansOfNumericAtts : -1.8039999971630749e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.46226756651637757, MinStdDevOfNumericAtts : 0.9999999964959101, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 11.0, NumberOfInstances : 100.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 11.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2957269629589532, Quartile1MeansOfNumericAtts : -1.4674000903269757e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.07348100988853508, Quartile1StdDevOfNumericAtts : 0.9999999978517549, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.124085302153607, Quartile2MeansOfNumericAtts : 8.399999806840696e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.09329535632550585, Quartile2StdDevOfNumericAtts : 0.9999999993175158, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -0.4607984092143367, Quartile3MeansOfNumericAtts : 2.44999999399198e-09, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.2637436042765021, Quartile3StdDevOfNumericAtts : 1.0000000022712177, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 11.0,\n", + " 'NumberOfInstances': 250.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 11.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 488,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 635,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c0_250_10',\n", + " 'qualities': 'AutoCorrelation : -0.19571919690361395, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.044, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.49539348492487134, MaxMeansOfNumericAtts : 9.80799973432367e-10, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.12543681897161996, MaxStdDevOfNumericAtts : 1.0000000034055463, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.091995458400567, MeanMeansOfNumericAtts : -5.895709083386154e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : -0.020703234788118627, MeanStdDevOfNumericAtts : 0.9999999992112348, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.2642171132325368, MinMeansOfNumericAtts : -1.7864000378953195e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.15693574724850204, MinStdDevOfNumericAtts : 0.9999999965931693, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 11.0, NumberOfInstances : 250.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 11.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.1866639579972689, Quartile1MeansOfNumericAtts : -1.351999991627828e-09, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.07245627680151549, Quartile1StdDevOfNumericAtts : 0.9999999975463832, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1448645319602913, Quartile2MeansOfNumericAtts : -8.391999681833796e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : -0.03516477227398804, Quartile2StdDevOfNumericAtts : 0.99999999894645, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.0684550808914777, Quartile3MeansOfNumericAtts : 1.7840003208036136e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.062462108917709154, Quartile3StdDevOfNumericAtts : 0.9999999999553407, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 51.0,\n", + " 'NumberOfInstances': 100.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 51.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 489,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 636,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)], 26 : [26 - oz27 (numeric)], 27 : [27 - oz28 (numeric)], 28 : [28 - oz29 (numeric)], 29 : [29 - oz30 (numeric)], 30 : [30 - oz31 (numeric)], 31 : [31 - oz32 (numeric)], 32 : [32 - oz33 (numeric)], 33 : [33 - oz34 (numeric)], 34 : [34 - oz35 (numeric)], 35 : [35 - oz36 (numeric)], 36 : [36 - oz37 (numeric)], 37 : [37 - oz38 (numeric)], 38 : [38 - oz39 (numeric)], 39 : [39 - oz40 (numeric)], 40 : [40 - oz41 (numeric)], 41 : [41 - oz42 (numeric)], 42 : [42 - oz43 (numeric)], 43 : [43 - oz44 (numeric)], 44 : [44 - oz45 (numeric)], 45 : [45 - oz46 (numeric)], 46 : [46 - oz47 (numeric)], 47 : [47 - oz48 (numeric)], 48 : [48 - oz49 (numeric)], 49 : [49 - oz50 (numeric)], 50 : [50 - oz51 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c1_100_50',\n", + " 'qualities': 'AutoCorrelation : -0.15358441155555566, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.51, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.6068823154055947, MaxMeansOfNumericAtts : 3.939999986268816e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.30742283514774926, MaxStdDevOfNumericAtts : 1.0000000036509502, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.1445225066134481, MeanMeansOfNumericAtts : 1.4522157308110365e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : 0.013614947481475233, MeanStdDevOfNumericAtts : 0.9999999995473542, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.4451227705200003, MinMeansOfNumericAtts : -4.590000000082028e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.5508008053990625, MinStdDevOfNumericAtts : 0.9999999948249523, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 51.0, NumberOfInstances : 100.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 51.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.2376474236436446, Quartile1MeansOfNumericAtts : -1.1960000256827642e-09, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.07685021807088445, Quartile1StdDevOfNumericAtts : 0.9999999979459647, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1771948671208612, Quartile2MeansOfNumericAtts : 1.9000002460245467e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.011542347915134551, Quartile2StdDevOfNumericAtts : 0.9999999997567961, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.048683670883777, Quartile3MeansOfNumericAtts : 1.6000000073457256e-09, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.11943274904046973, Quartile3StdDevOfNumericAtts : 1.0000000008933372, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'NumberOfClasses': 0.0,\n", + " 'NumberOfFeatures': 51.0,\n", + " 'NumberOfInstances': 500.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 51.0,\n", + " 'NumberOfSymbolicFeatures': 0.0,\n", + " 'Unnamed: 0': 490,\n", + " 'description': '**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\\n\\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\\n\\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \\n\\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\\n\\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm\\'s robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \\n\\nThe generated Friedman dataset\\'s parameters and values are given below: \\nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\\nThe number of samples: 100 250 500 1000\\nColinearity degrees: 0 1 2 3 4\\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\\nThe other datasets have 5, 10, 25 and 50 features.\\n\\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\\n\\nThe last attribute in each file is the target.',\n", + " 'did': 637,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - oz6 (numeric)], 6 : [6 - oz7 (numeric)], 7 : [7 - oz8 (numeric)], 8 : [8 - oz9 (numeric)], 9 : [9 - oz10 (numeric)], 10 : [10 - oz11 (numeric)], 11 : [11 - oz12 (numeric)], 12 : [12 - oz13 (numeric)], 13 : [13 - oz14 (numeric)], 14 : [14 - oz15 (numeric)], 15 : [15 - oz16 (numeric)], 16 : [16 - oz17 (numeric)], 17 : [17 - oz18 (numeric)], 18 : [18 - oz19 (numeric)], 19 : [19 - oz20 (numeric)], 20 : [20 - oz21 (numeric)], 21 : [21 - oz22 (numeric)], 22 : [22 - oz23 (numeric)], 23 : [23 - oz24 (numeric)], 24 : [24 - oz25 (numeric)], 25 : [25 - oz26 (numeric)], 26 : [26 - oz27 (numeric)], 27 : [27 - oz28 (numeric)], 28 : [28 - oz29 (numeric)], 29 : [29 - oz30 (numeric)], 30 : [30 - oz31 (numeric)], 31 : [31 - oz32 (numeric)], 32 : [32 - oz33 (numeric)], 33 : [33 - oz34 (numeric)], 34 : [34 - oz35 (numeric)], 35 : [35 - oz36 (numeric)], 36 : [36 - oz37 (numeric)], 37 : [37 - oz38 (numeric)], 38 : [38 - oz39 (numeric)], 39 : [39 - oz40 (numeric)], 40 : [40 - oz41 (numeric)], 41 : [41 - oz42 (numeric)], 42 : [42 - oz43 (numeric)], 43 : [43 - oz44 (numeric)], 44 : [44 - oz45 (numeric)], 45 : [45 - oz46 (numeric)], 46 : [46 - oz47 (numeric)], 47 : [47 - oz48 (numeric)], 48 : [48 - oz49 (numeric)], 49 : [49 - oz50 (numeric)], 50 : [50 - oz51 (numeric)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c1_500_50',\n", + " 'qualities': 'AutoCorrelation : -0.1029116105935867, CfsSubsetEval_DecisionStumpAUC : nan, CfsSubsetEval_DecisionStumpErrRate : nan, CfsSubsetEval_DecisionStumpKappa : nan, CfsSubsetEval_NaiveBayesAUC : nan, CfsSubsetEval_NaiveBayesErrRate : nan, CfsSubsetEval_NaiveBayesKappa : nan, CfsSubsetEval_kNN1NAUC : nan, CfsSubsetEval_kNN1NErrRate : nan, CfsSubsetEval_kNN1NKappa : nan, ClassEntropy : nan, DecisionStumpAUC : nan, DecisionStumpErrRate : nan, DecisionStumpKappa : nan, Dimensionality : 0.102, EquivalentNumberOfAtts : nan, J48.00001.AUC : nan, J48.00001.ErrRate : nan, J48.00001.Kappa : nan, J48.0001.AUC : nan, J48.0001.ErrRate : nan, J48.0001.Kappa : nan, J48.001.AUC : nan, J48.001.ErrRate : nan, J48.001.Kappa : nan, MajorityClassPercentage : nan, MajorityClassSize : nan, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : -0.3958006181295963, MaxMeansOfNumericAtts : 1.854199979112181e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : nan, MaxSkewnessOfNumericAtts : 0.14393312684727844, MaxStdDevOfNumericAtts : 1.0000000025959064, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -1.1642918641905624, MeanMeansOfNumericAtts : 1.2128730281619083e-10, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : nan, MeanSkewnessOfNumericAtts : -0.012760485342081898, MeanStdDevOfNumericAtts : 0.9999999998792359, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.3384923344675994, MinMeansOfNumericAtts : -1.8019999661333941e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : nan, MinSkewnessOfNumericAtts : -0.281036048339426, MinStdDevOfNumericAtts : 0.9999999978689819, MinorityClassPercentage : nan, MinorityClassSize : nan, NaiveBayesAUC : nan, NaiveBayesErrRate : nan, NaiveBayesKappa : nan, NumberOfBinaryFeatures : 0.0, NumberOfClasses : 0.0, NumberOfFeatures : 51.0, NumberOfInstances : 500.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 51.0, NumberOfSymbolicFeatures : 0.0, PercentageOfBinaryFeatures : 0.0, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 100.0, PercentageOfSymbolicFeatures : 0.0, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.223622948866781, Quartile1MeansOfNumericAtts : -4.309000054547596e-10, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.06723563152489599, Quartile1StdDevOfNumericAtts : 0.9999999991271817, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -1.1655706465365527, Quartile2MeansOfNumericAtts : 1.4859997721217156e-10, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : -0.013705906353867023, Quartile2StdDevOfNumericAtts : 0.9999999998569478, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : -1.1477087655562215, Quartile3MeansOfNumericAtts : 7.3686000745532e-10, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.04372579533405131, Quartile3StdDevOfNumericAtts : 1.0000000004926681, REPTreeDepth1AUC : nan, REPTreeDepth1ErrRate : nan, REPTreeDepth1Kappa : nan, REPTreeDepth2AUC : nan, REPTreeDepth2ErrRate : nan, REPTreeDepth2Kappa : nan, REPTreeDepth3AUC : nan, REPTreeDepth3ErrRate : nan, REPTreeDepth3Kappa : nan, RandomTreeDepth1AUC : nan, RandomTreeDepth1ErrRate : nan, RandomTreeDepth1Kappa : nan, RandomTreeDepth2AUC : nan, RandomTreeDepth2ErrRate : nan, RandomTreeDepth2Kappa : nan, RandomTreeDepth3AUC : nan, RandomTreeDepth3ErrRate : nan, RandomTreeDepth3Kappa : nan, StdvNominalAttDistinctValues : nan, kNN1NAUC : nan, kNN1NErrRate : nan, kNN1NKappa : nan,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 1},\n", + " {'MajorityClassSize': 5715.0,\n", + " 'MaxNominalAttDistinctValues': 2.0,\n", + " 'MinorityClassSize': 2477.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 22.0,\n", + " 'NumberOfInstances': 8192.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 21.0,\n", + " 'NumberOfSymbolicFeatures': 1.0,\n", + " 'Unnamed: 0': 610,\n", + " 'description': \"**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nBinarized version of the original data set (see version 1). It converts the numeric target feature to a two-class nominal target feature by computing the mean and classifying all instances with a lower target value as positive ('P') and all others as negative ('N').\",\n", + " 'did': 761,\n", + " 'features': '0 : [0 - lread (numeric)], 1 : [1 - lwrite (numeric)], 2 : [2 - scall (numeric)], 3 : [3 - sread (numeric)], 4 : [4 - swrite (numeric)], 5 : [5 - fork (numeric)], 6 : [6 - exec (numeric)], 7 : [7 - rchar (numeric)], 8 : [8 - wchar (numeric)], 9 : [9 - pgout (numeric)], 10 : [10 - ppgout (numeric)], 11 : [11 - pgfree (numeric)], 12 : [12 - pgscan (numeric)], 13 : [13 - atch (numeric)], 14 : [14 - pgin (numeric)], 15 : [15 - ppgin (numeric)], 16 : [16 - pflt (numeric)], 17 : [17 - vflt (numeric)], 18 : [18 - runqsz (numeric)], 19 : [19 - freemem (numeric)], 20 : [20 - freeswap (numeric)], 21 : [21 - binaryClass (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'cpu_act',\n", + " 'qualities': 'AutoCorrelation : 0.569893785862532, CfsSubsetEval_DecisionStumpAUC : 0.9063675579107315, CfsSubsetEval_DecisionStumpErrRate : 0.0908203125, CfsSubsetEval_DecisionStumpKappa : 0.7817815135582044, CfsSubsetEval_NaiveBayesAUC : 0.9063675579107315, CfsSubsetEval_NaiveBayesErrRate : 0.0908203125, CfsSubsetEval_NaiveBayesKappa : 0.7817815135582044, CfsSubsetEval_kNN1NAUC : 0.9063675579107315, CfsSubsetEval_kNN1NErrRate : 0.0908203125, CfsSubsetEval_kNN1NKappa : 0.7817815135582044, ClassEntropy : 0.8841664897438648, DecisionStumpAUC : 0.8142696181951823, DecisionStumpErrRate : 0.1429443359375, DecisionStumpKappa : 0.6521448025469263, Dimensionality : 0.002685546875, EquivalentNumberOfAtts : nan, J48.00001.AUC : 0.9007884965126232, J48.00001.ErrRate : 0.087158203125, J48.00001.Kappa : 0.7888672445074003, J48.0001.AUC : 0.9007884965126232, J48.0001.ErrRate : 0.087158203125, J48.0001.Kappa : 0.7888672445074003, J48.001.AUC : 0.9007884965126232, J48.001.ErrRate : 0.087158203125, J48.001.Kappa : 0.7888672445074003, MajorityClassPercentage : 69.76318359375, MajorityClassSize : 5715.0, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 618.0754029990946, MaxMeansOfNumericAtts : 1328125.9598388672, MaxMutualInformation : nan, MaxNominalAttDistinctValues : 2.0, MaxSkewnessOfNumericAtts : 21.542019683245, MaxStdDevOfNumericAtts : 422019.42695680115, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : 82.69398017053769, MeanMeansOfNumericAtts : 77423.0389258975, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : 2.0, MeanSkewnessOfNumericAtts : 5.398356926130622, MeanStdDevOfNumericAtts : 38448.58530864211, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : 0.9031412644665848, MinMeansOfNumericAtts : 1.1275048828124994, MinMutualInformation : nan, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : -0.7916644438525916, MinStdDevOfNumericAtts : 2.479493426887122, MinorityClassPercentage : 30.23681640625, MinorityClassSize : 2477.0, NaiveBayesAUC : 0.9568507715938583, NaiveBayesErrRate : 0.10498046875, NaiveBayesKappa : 0.747991928724961, NumberOfBinaryFeatures : 1.0, NumberOfClasses : 2.0, NumberOfFeatures : 22.0, NumberOfInstances : 8192.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 21.0, NumberOfSymbolicFeatures : 1.0, PercentageOfBinaryFeatures : 4.545454545454546, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 95.45454545454545, PercentageOfSymbolicFeatures : 4.545454545454546, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : 4.347867554757658, Quartile1MeansOfNumericAtts : 7.12759460449219, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : 2.02862189623975, Quartile1StdDevOfNumericAtts : 14.544784181911057, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : 22.821806534075247, Quartile2MeansOfNumericAtts : 19.630676269531282, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 4.069237707552649, Quartile2StdDevOfNumericAtts : 71.1413402583838, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 69.61173013955492, Quartile3MeansOfNumericAtts : 986.9681396484375, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 5.636440553258243, Quartile3StdDevOfNumericAtts : 916.2987340329522, REPTreeDepth1AUC : 0.9439130463960475, REPTreeDepth1ErrRate : 0.0888671875, REPTreeDepth1Kappa : 0.7850288992712985, REPTreeDepth2AUC : 0.9439130463960475, REPTreeDepth2ErrRate : 0.0888671875, REPTreeDepth2Kappa : 0.7850288992712985, REPTreeDepth3AUC : 0.9439130463960475, REPTreeDepth3ErrRate : 0.0888671875, REPTreeDepth3Kappa : 0.7850288992712985, RandomTreeDepth1AUC : 0.8805606505484755, RandomTreeDepth1ErrRate : 0.1005859375, RandomTreeDepth1Kappa : 0.7614696522066133, RandomTreeDepth2AUC : 0.8805606505484755, RandomTreeDepth2ErrRate : 0.1005859375, RandomTreeDepth2Kappa : 0.7614696522066133, RandomTreeDepth3AUC : 0.8805606505484755, RandomTreeDepth3ErrRate : 0.1005859375, RandomTreeDepth3Kappa : 0.7614696522066133, StdvNominalAttDistinctValues : 0.0, kNN1NAUC : 0.8873110128492718, kNN1NErrRate : 0.0902099609375, kNN1NKappa : 0.7836743818275687,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 3},\n", + " {'MajorityClassSize': 563.0,\n", + " 'MaxNominalAttDistinctValues': 2.0,\n", + " 'MinorityClassSize': 437.0,\n", + " 'NumberOfClasses': 2.0,\n", + " 'NumberOfFeatures': 6.0,\n", + " 'NumberOfInstances': 1000.0,\n", + " 'NumberOfInstancesWithMissingValues': 0.0,\n", + " 'NumberOfMissingValues': 0.0,\n", + " 'NumberOfNumericFeatures': 5.0,\n", + " 'NumberOfSymbolicFeatures': 1.0,\n", + " 'Unnamed: 0': 660,\n", + " 'description': \"**Author**: \\n**Source**: Unknown - Date unknown \\n**Please cite**: \\n\\nBinarized version of the original data set (see version 1). It converts the numeric target feature to a two-class nominal target feature by computing the mean and classifying all instances with a lower target value as positive ('P') and all others as negative ('N').\",\n", + " 'did': 813,\n", + " 'features': '0 : [0 - oz1 (numeric)], 1 : [1 - oz2 (numeric)], 2 : [2 - oz3 (numeric)], 3 : [3 - oz4 (numeric)], 4 : [4 - oz5 (numeric)], 5 : [5 - binaryClass (nominal)],',\n", + " 'format': 'ARFF',\n", + " 'name': 'fri_c3_1000_5',\n", + " 'qualities': 'AutoCorrelation : 0.5085085085085085, CfsSubsetEval_DecisionStumpAUC : 0.8395669651385395, CfsSubsetEval_DecisionStumpErrRate : 0.166, CfsSubsetEval_DecisionStumpKappa : 0.6636775843135982, CfsSubsetEval_NaiveBayesAUC : 0.8395669651385395, CfsSubsetEval_NaiveBayesErrRate : 0.166, CfsSubsetEval_NaiveBayesKappa : 0.6636775843135982, CfsSubsetEval_kNN1NAUC : 0.8395669651385395, CfsSubsetEval_kNN1NErrRate : 0.166, CfsSubsetEval_kNN1NKappa : 0.6636775843135982, ClassEntropy : 0.9885173903891564, DecisionStumpAUC : 0.6816478411257116, DecisionStumpErrRate : 0.307, DecisionStumpKappa : 0.38413763219977204, Dimensionality : 0.006, EquivalentNumberOfAtts : nan, J48.00001.AUC : 0.8689982156720089, J48.00001.ErrRate : 0.141, J48.00001.Kappa : 0.7123458704291555, J48.0001.AUC : 0.8689982156720089, J48.0001.ErrRate : 0.141, J48.0001.Kappa : 0.7123458704291555, J48.001.AUC : 0.8689982156720089, J48.001.ErrRate : 0.141, J48.001.Kappa : 0.7123458704291555, MajorityClassPercentage : 56.3, MajorityClassSize : 563.0, MaxAttributeEntropy : nan, MaxKurtosisOfNumericAtts : 1.153622378636951, MaxMeansOfNumericAtts : 9.000000004810715e-09, MaxMutualInformation : nan, MaxNominalAttDistinctValues : 2.0, MaxSkewnessOfNumericAtts : 1.246941780251248, MaxStdDevOfNumericAtts : 1.0000000357900796, MeanAttributeEntropy : nan, MeanKurtosisOfNumericAtts : -0.39534744957352824, MeanMeansOfNumericAtts : 2.0000000069686498e-09, MeanMutualInformation : nan, MeanNoiseToSignalRatio : nan, MeanNominalAttDistinctValues : 2.0, MeanSkewnessOfNumericAtts : 0.3799051670286494, MeanStdDevOfNumericAtts : 1.0000000300225595, MinAttributeEntropy : nan, MinKurtosisOfNumericAtts : -1.199653149643545, MinMeansOfNumericAtts : -4.0000000260942415e-09, MinMutualInformation : nan, MinNominalAttDistinctValues : 2.0, MinSkewnessOfNumericAtts : -0.047776570555339945, MinStdDevOfNumericAtts : 1.0000000249286416, MinorityClassPercentage : 43.7, MinorityClassSize : 437.0, NaiveBayesAUC : 0.7034560685442078, NaiveBayesErrRate : 0.331, NaiveBayesKappa : 0.3247268305819183, NumberOfBinaryFeatures : 1.0, NumberOfClasses : 2.0, NumberOfFeatures : 6.0, NumberOfInstances : 1000.0, NumberOfInstancesWithMissingValues : 0.0, NumberOfMissingValues : 0.0, NumberOfNumericFeatures : 5.0, NumberOfSymbolicFeatures : 1.0, PercentageOfBinaryFeatures : 16.666666666666664, PercentageOfInstancesWithMissingValues : 0.0, PercentageOfMissingValues : 0.0, PercentageOfNumericFeatures : 83.33333333333334, PercentageOfSymbolicFeatures : 16.666666666666664, Quartile1AttributeEntropy : nan, Quartile1KurtosisOfNumericAtts : -1.1837277136220274, Quartile1MeansOfNumericAtts : -2.4999999939656625e-09, Quartile1MutualInformation : nan, Quartile1SkewnessOfNumericAtts : -0.040956945030882735, Quartile1StdDevOfNumericAtts : 1.0000000252149224, Quartile2AttributeEntropy : nan, Quartile2KurtosisOfNumericAtts : -0.7857435131844928, Quartile2MeansOfNumericAtts : 3.9968028886505634e-17, Quartile2MutualInformation : nan, Quartile2SkewnessOfNumericAtts : 0.041091208226431544, Quartile2StdDevOfNumericAtts : 1.0000000316528863, Quartile3AttributeEntropy : nan, Quartile3KurtosisOfNumericAtts : 0.5882308462804533, Quartile3MeansOfNumericAtts : 7.499999991403272e-09, Quartile3MutualInformation : nan, Quartile3SkewnessOfNumericAtts : 0.9701742584892905, Quartile3StdDevOfNumericAtts : 1.0000000340150332, REPTreeDepth1AUC : 0.8576500522291906, REPTreeDepth1ErrRate : 0.155, REPTreeDepth1Kappa : 0.6850796849984152, REPTreeDepth2AUC : 0.8576500522291906, REPTreeDepth2ErrRate : 0.155, REPTreeDepth2Kappa : 0.6850796849984152, REPTreeDepth3AUC : 0.8576500522291906, REPTreeDepth3ErrRate : 0.155, REPTreeDepth3Kappa : 0.6850796849984152, RandomTreeDepth1AUC : 0.8483483788628262, RandomTreeDepth1ErrRate : 0.15, RandomTreeDepth1Kappa : 0.6956280006006273, RandomTreeDepth2AUC : 0.8483483788628262, RandomTreeDepth2ErrRate : 0.15, RandomTreeDepth2Kappa : 0.6956280006006273, RandomTreeDepth3AUC : 0.8483483788628262, RandomTreeDepth3ErrRate : 0.15, RandomTreeDepth3Kappa : 0.6956280006006273, StdvNominalAttDistinctValues : 0.0, kNN1NAUC : 0.8667749186078177, kNN1NErrRate : 0.133, kNN1NKappa : 0.730743068152371,',\n", + " 'status': 'active',\n", + " 'uploader': 2,\n", + " 'version': 2}],\n", + " 'documents': None,\n", + " 'uris': None,\n", + " 'data': None}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "collec.get(ids = ids, include=['metadatas'])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}