diff --git a/Feature Selection using Genetic ALgorithm/Feature_Selection_for_Machine_Learning_with_Genetic_Algorithm.ipynb b/Feature Selection using Genetic ALgorithm/Feature_Selection_for_Machine_Learning_with_Genetic_Algorithm.ipynb
new file mode 100644
index 0000000000..33bea66749
--- /dev/null
+++ b/Feature Selection using Genetic ALgorithm/Feature_Selection_for_Machine_Learning_with_Genetic_Algorithm.ipynb
@@ -0,0 +1,696 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "id": "9f4c0HZLL8lT",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 255
+ },
+ "outputId": "53a5719a-25d2-40ea-aa2a-a549fd4b7d08"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Feature_1 Feature_2 Feature_3 Feature_4 Feature_5 Feature_6 \\\n",
+ "0 1.470848 -0.360450 -0.591602 -0.728228 0.941690 1.065964 \n",
+ "1 4.513369 -2.227103 -1.140747 2.018263 -2.238358 -0.497370 \n",
+ "2 -2.355643 2.218601 -1.603269 0.873394 0.401483 0.717264 \n",
+ "3 -1.596198 -0.857427 1.772434 -0.639361 1.419409 -0.438525 \n",
+ "4 2.840049 -2.489600 -0.844902 -1.594362 -4.688517 0.459637 \n",
+ "\n",
+ " Feature_7 Feature_8 Feature_9 Feature_10 ... Feature_12 Feature_13 \\\n",
+ "0 0.017832 -0.596184 1.840712 -1.497093 ... -0.603968 2.899256 \n",
+ "1 0.714550 0.938883 -2.395169 0.159837 ... 1.461499 3.954171 \n",
+ "2 -0.859399 -1.042190 -2.175965 0.980231 ... 0.544434 -2.466258 \n",
+ "3 0.281949 2.345145 1.006230 0.389135 ... -1.025051 -2.422975 \n",
+ "4 0.913607 -1.143505 1.263937 -2.040928 ... 4.176424 1.341742 \n",
+ "\n",
+ " Feature_14 Feature_15 Feature_16 Feature_17 Feature_18 Feature_19 \\\n",
+ "0 0.037567 -1.249523 0.257963 0.416628 1.408208 -1.838041 \n",
+ "1 0.309054 0.538184 -7.157865 -4.532216 -0.081800 -9.325362 \n",
+ "2 -0.470256 0.073018 -2.203531 -2.299263 -1.742761 -0.271579 \n",
+ "3 1.579807 -0.300713 4.267120 2.893775 1.236697 6.034785 \n",
+ "4 0.133565 1.743819 1.531188 2.269808 0.053489 -3.151109 \n",
+ "\n",
+ " Feature_20 Target \n",
+ "0 -0.833142 1 \n",
+ "1 0.574386 1 \n",
+ "2 -0.359285 0 \n",
+ "3 -0.045711 0 \n",
+ "4 1.603702 0 \n",
+ "\n",
+ "[5 rows x 21 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Feature_1 | \n",
+ " Feature_2 | \n",
+ " Feature_3 | \n",
+ " Feature_4 | \n",
+ " Feature_5 | \n",
+ " Feature_6 | \n",
+ " Feature_7 | \n",
+ " Feature_8 | \n",
+ " Feature_9 | \n",
+ " Feature_10 | \n",
+ " ... | \n",
+ " Feature_12 | \n",
+ " Feature_13 | \n",
+ " Feature_14 | \n",
+ " Feature_15 | \n",
+ " Feature_16 | \n",
+ " Feature_17 | \n",
+ " Feature_18 | \n",
+ " Feature_19 | \n",
+ " Feature_20 | \n",
+ " Target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1.470848 | \n",
+ " -0.360450 | \n",
+ " -0.591602 | \n",
+ " -0.728228 | \n",
+ " 0.941690 | \n",
+ " 1.065964 | \n",
+ " 0.017832 | \n",
+ " -0.596184 | \n",
+ " 1.840712 | \n",
+ " -1.497093 | \n",
+ " ... | \n",
+ " -0.603968 | \n",
+ " 2.899256 | \n",
+ " 0.037567 | \n",
+ " -1.249523 | \n",
+ " 0.257963 | \n",
+ " 0.416628 | \n",
+ " 1.408208 | \n",
+ " -1.838041 | \n",
+ " -0.833142 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4.513369 | \n",
+ " -2.227103 | \n",
+ " -1.140747 | \n",
+ " 2.018263 | \n",
+ " -2.238358 | \n",
+ " -0.497370 | \n",
+ " 0.714550 | \n",
+ " 0.938883 | \n",
+ " -2.395169 | \n",
+ " 0.159837 | \n",
+ " ... | \n",
+ " 1.461499 | \n",
+ " 3.954171 | \n",
+ " 0.309054 | \n",
+ " 0.538184 | \n",
+ " -7.157865 | \n",
+ " -4.532216 | \n",
+ " -0.081800 | \n",
+ " -9.325362 | \n",
+ " 0.574386 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " -2.355643 | \n",
+ " 2.218601 | \n",
+ " -1.603269 | \n",
+ " 0.873394 | \n",
+ " 0.401483 | \n",
+ " 0.717264 | \n",
+ " -0.859399 | \n",
+ " -1.042190 | \n",
+ " -2.175965 | \n",
+ " 0.980231 | \n",
+ " ... | \n",
+ " 0.544434 | \n",
+ " -2.466258 | \n",
+ " -0.470256 | \n",
+ " 0.073018 | \n",
+ " -2.203531 | \n",
+ " -2.299263 | \n",
+ " -1.742761 | \n",
+ " -0.271579 | \n",
+ " -0.359285 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " -1.596198 | \n",
+ " -0.857427 | \n",
+ " 1.772434 | \n",
+ " -0.639361 | \n",
+ " 1.419409 | \n",
+ " -0.438525 | \n",
+ " 0.281949 | \n",
+ " 2.345145 | \n",
+ " 1.006230 | \n",
+ " 0.389135 | \n",
+ " ... | \n",
+ " -1.025051 | \n",
+ " -2.422975 | \n",
+ " 1.579807 | \n",
+ " -0.300713 | \n",
+ " 4.267120 | \n",
+ " 2.893775 | \n",
+ " 1.236697 | \n",
+ " 6.034785 | \n",
+ " -0.045711 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2.840049 | \n",
+ " -2.489600 | \n",
+ " -0.844902 | \n",
+ " -1.594362 | \n",
+ " -4.688517 | \n",
+ " 0.459637 | \n",
+ " 0.913607 | \n",
+ " -1.143505 | \n",
+ " 1.263937 | \n",
+ " -2.040928 | \n",
+ " ... | \n",
+ " 4.176424 | \n",
+ " 1.341742 | \n",
+ " 0.133565 | \n",
+ " 1.743819 | \n",
+ " 1.531188 | \n",
+ " 2.269808 | \n",
+ " 0.053489 | \n",
+ " -3.151109 | \n",
+ " 1.603702 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 21 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df"
+ }
+ },
+ "metadata": {},
+ "execution_count": 4
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "# Load dataset\n",
+ "df = pd.read_csv('feature_selection.csv')\n",
+ "df.head()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Define Helper Functions\n"
+ ],
+ "metadata": {
+ "id": "hA2TeA5_Kw2Y"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.metrics import accuracy_score\n",
+ "\n",
+ "# Fitness function: Evaluate model performance\n",
+ "def fitness_function(individual, X, y):\n",
+ " selected_features = [feature for feature, include in zip(X.columns, individual) if include == 1]\n",
+ " if len(selected_features) == 0:\n",
+ " return 0\n",
+ " X_selected = X[selected_features]\n",
+ " X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)\n",
+ " model = RandomForestClassifier(random_state=42)\n",
+ " model.fit(X_train, y_train)\n",
+ " y_pred = model.predict(X_test)\n",
+ " return accuracy_score(y_test, y_pred)\n"
+ ],
+ "metadata": {
+ "id": "98bo4d4HK0Jw"
+ },
+ "execution_count": 5,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Initialize the Population"
+ ],
+ "metadata": {
+ "id": "E5ine5SzK_5f"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import numpy as np\n",
+ "\n",
+ "def initialize_population(pop_size, num_features):\n",
+ " population = np.random.randint(2, size=(pop_size, num_features))\n",
+ " return population\n",
+ "\n",
+ "pop_size = 50\n",
+ "num_features = df.shape[1] - 1 # Exclude the target column\n",
+ "population = initialize_population(pop_size, num_features)\n"
+ ],
+ "metadata": {
+ "id": "dz67xrQfK_mP"
+ },
+ "execution_count": 6,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Define Genetic Algorithm Operations"
+ ],
+ "metadata": {
+ "id": "DA63bSjQLEO-"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Selection: Select the best-performing individuals\n",
+ "def selection(population, fitness_scores, num_parents):\n",
+ " parents = np.empty((num_parents, population.shape[1]))\n",
+ " for i in range(num_parents):\n",
+ " max_fitness_idx = np.where(fitness_scores == np.max(fitness_scores))\n",
+ " max_fitness_idx = max_fitness_idx[0][0]\n",
+ " parents[i, :] = population[max_fitness_idx, :]\n",
+ " fitness_scores[max_fitness_idx] = -999999 # Avoid selecting the same individual\n",
+ " return parents\n",
+ "\n",
+ "# Crossover: Combine pairs of parents to create offspring\n",
+ "def crossover(parents, offspring_size):\n",
+ " offspring = np.empty(offspring_size)\n",
+ " crossover_point = np.uint8(offspring_size[1] / 2)\n",
+ "\n",
+ " for k in range(offspring_size[0]):\n",
+ " parent1_idx = k % parents.shape[0]\n",
+ " parent2_idx = (k + 1) % parents.shape[0]\n",
+ " offspring[k, 0:crossover_point] = parents[parent1_idx, 0:crossover_point]\n",
+ " offspring[k, crossover_point:] = parents[parent2_idx, crossover_point:]\n",
+ " return offspring\n",
+ "\n",
+ "# Mutation: Introduce random changes to some individuals\n",
+ "def mutation(offspring, mutation_rate=0.01):\n",
+ " for idx in range(offspring.shape[0]):\n",
+ " for gene_idx in range(offspring.shape[1]):\n",
+ " if np.random.rand() < mutation_rate:\n",
+ " offspring[idx, gene_idx] = 1 - offspring[idx, gene_idx]\n",
+ " return offspring\n"
+ ],
+ "metadata": {
+ "id": "z7JxcxlWK-aQ"
+ },
+ "execution_count": 7,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Run the Genetic Algorithm"
+ ],
+ "metadata": {
+ "id": "oFKyWCb3LLCn"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def genetic_algorithm(X, y, num_generations, pop_size, num_parents, mutation_rate):\n",
+ " num_features = X.shape[1]\n",
+ " population = initialize_population(pop_size, num_features)\n",
+ " for generation in range(num_generations):\n",
+ " fitness_scores = np.array([fitness_function(individual, X, y) for individual in population])\n",
+ " parents = selection(population, fitness_scores, num_parents)\n",
+ " offspring_size = (pop_size - parents.shape[0], num_features)\n",
+ " offspring = crossover(parents, offspring_size)\n",
+ " offspring = mutation(offspring, mutation_rate)\n",
+ " population[0:parents.shape[0], :] = parents\n",
+ " population[parents.shape[0]:, :] = offspring\n",
+ " best_fitness = np.max(fitness_scores)\n",
+ " print(f\"Generation {generation}: Best Fitness = {best_fitness}\")\n",
+ " return population, fitness_scores\n",
+ "\n",
+ "X = df.drop(columns='Target')\n",
+ "y = df['Target']\n",
+ "\n",
+ "num_generations = 30\n",
+ "num_parents = 10\n",
+ "population, fitness_scores = genetic_algorithm(X, y, num_generations, pop_size, num_parents, mutation_rate=0.01)\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "1YnloIwTLPI1",
+ "outputId": "8d80a4f3-4c1f-49d5-cc8c-84552eedfe5d"
+ },
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Generation 0: Best Fitness = 0.88\n",
+ "Generation 1: Best Fitness = 0.91\n",
+ "Generation 2: Best Fitness = 0.92\n",
+ "Generation 3: Best Fitness = 0.93\n",
+ "Generation 4: Best Fitness = 0.93\n",
+ "Generation 5: Best Fitness = 0.93\n",
+ "Generation 6: Best Fitness = 0.935\n",
+ "Generation 7: Best Fitness = 0.935\n",
+ "Generation 8: Best Fitness = 0.935\n",
+ "Generation 9: Best Fitness = 0.94\n",
+ "Generation 10: Best Fitness = 0.94\n",
+ "Generation 11: Best Fitness = 0.94\n",
+ "Generation 12: Best Fitness = 0.94\n",
+ "Generation 13: Best Fitness = 0.94\n",
+ "Generation 14: Best Fitness = 0.94\n",
+ "Generation 15: Best Fitness = 0.94\n",
+ "Generation 16: Best Fitness = 0.94\n",
+ "Generation 17: Best Fitness = 0.94\n",
+ "Generation 18: Best Fitness = 0.94\n",
+ "Generation 19: Best Fitness = 0.94\n",
+ "Generation 20: Best Fitness = 0.94\n",
+ "Generation 21: Best Fitness = 0.94\n",
+ "Generation 22: Best Fitness = 0.94\n",
+ "Generation 23: Best Fitness = 0.94\n",
+ "Generation 24: Best Fitness = 0.94\n",
+ "Generation 25: Best Fitness = 0.94\n",
+ "Generation 26: Best Fitness = 0.94\n",
+ "Generation 27: Best Fitness = 0.94\n",
+ "Generation 28: Best Fitness = 0.94\n",
+ "Generation 29: Best Fitness = 0.94\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Evaluate the best feature set"
+ ],
+ "metadata": {
+ "id": "C1XI1XXhLRA1"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "best_idx = np.argmax(fitness_scores)\n",
+ "best_individual = population[best_idx, :]\n",
+ "selected_features = [feature for feature, include in zip(X.columns, best_individual) if include == 1]\n",
+ "\n",
+ "print(f\"Selected Features: {selected_features}\")\n",
+ "\n",
+ "# Evaluate performance using the best feature subset\n",
+ "X_selected = X[selected_features]\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)\n",
+ "model = RandomForestClassifier(random_state=42)\n",
+ "model.fit(X_train, y_train)\n",
+ "y_pred = model.predict(X_test)\n",
+ "print(f\"Accuracy with selected features: {accuracy_score(y_test, y_pred)}\")\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "KXQRb3C5LU9q",
+ "outputId": "88e484bb-2c30-46f6-837b-0d87dca90ff2"
+ },
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Selected Features: ['Feature_1', 'Feature_2', 'Feature_3', 'Feature_4', 'Feature_6', 'Feature_7', 'Feature_8', 'Feature_9', 'Feature_11', 'Feature_13', 'Feature_15', 'Feature_17', 'Feature_19', 'Feature_20']\n",
+ "Accuracy with selected features: 0.91\n"
+ ]
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/Feature Selection using Genetic ALgorithm/readme.md b/Feature Selection using Genetic ALgorithm/readme.md
new file mode 100644
index 0000000000..e894b581eb
--- /dev/null
+++ b/Feature Selection using Genetic ALgorithm/readme.md
@@ -0,0 +1,26 @@
+## Introduction
+Feature selection is a critical step in machine learning that involves selecting a subset of relevant features for model building. This project demonstrates how to use Genetic Algorithms (GA) to perform feature selection, optimizing the performance of machine learning models.
+
+## Project Overview
+The primary objective of this project is to leverage Genetic Algorithms to select the best features for a given machine learning task. The project involves:
+
+1. Implementing a Genetic Algorithm for feature selection.
+2. Comparing the performance of models trained with all features versus the selected features.
+3. Analyzing the results to determine the effectiveness of the Genetic Algorithm in feature selection.
+
+## Methodology
+The Genetic Algorithm (GA) follows these steps:
+
+1. Initialization: Generate an initial population of feature subsets.
+2. Selection: Evaluate the fitness of each subset using a predefined fitness function (e.g., model accuracy).
+3. Crossover: Combine pairs of feature subsets to produce new offspring.
+4. Mutation: Introduce random changes to feature subsets to maintain genetic diversity.
+5. Replacement: Replace less fit subsets with new offspring.
+6. Termination: Stop the algorithm after a set number of generations or if convergence criteria are met.
+
+## Usage
+1. Prepare dataset in a CSV file.
+2. Load the dataset and preprocess it if necessary.
+3. Configure the Genetic Algorithm parameters.
+4. Run the Genetic Algorithm to perform feature selection.
+5. Evaluate the selected features using a machine learning model.
\ No newline at end of file
diff --git a/Feature Selection using Genetic ALgorithm/requirement.txt b/Feature Selection using Genetic ALgorithm/requirement.txt
new file mode 100644
index 0000000000..36dd3e911d
--- /dev/null
+++ b/Feature Selection using Genetic ALgorithm/requirement.txt
@@ -0,0 +1,9 @@
+## Requirements
+Python 3.x
+NumPy
+pandas
+OpenAI Gym
+Stable Baselines3
+TensorFlow/Keras
+Matplotlib
+Jupyter Notebook (optional, for interactive exploration)
\ No newline at end of file