diff --git a/Dockerfile b/Dockerfile
index d9dd6d2..c709802 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,16 +3,17 @@ FROM quay.io/jupyter/minimal-notebook:notebook-7.0.6
 
 # install necessary packages for analysis
 RUN conda install -y \
-    python=3.11.6 \
-    altair=5.1.2 \
-    pandas=2.1.2 \
-    ipykernel=6.26.0 \
-    scikit-learn=1.3.2 \
-    requests=2.31.0 \
-    notebook=7.0.6 \
-    pytest=7.4.3 \
-    responses=0.24.1 \
+    python=3.11.7 \
+    altair=5.4.1 \
+    pandas=1.5.3 \
+    ipykernel=6.29.5  \
+    scikit-learn=1.5.2 \
+    requests=2.32.3 \
+    notebook=7.0.8 \
+    pytest=8.3.3 \
+    responses=0.25.3 \
     click=8.1.7 \
-    vl-convert-python=1.1.0 \
-    jupyter-book=0.15.1 \
-    make
+    vl-convert-python=1.7.0 \
+    jupyter-book=1.0.3 \
+    make 
+RUN pip install great-expectations==1.1.3
diff --git a/data/processed/data_config.csv b/data/processed/data_config.csv
new file mode 100644
index 0000000..c00e11b
--- /dev/null
+++ b/data/processed/data_config.csv
@@ -0,0 +1,32 @@
+column,type,min,max,category,max_nullable
+diagnosis,str,,,"Malignant,Benign",0
+mean_radius,float,6,40,,0.1
+mean_texture,float,9,50,,0.1
+mean_perimeter,float,40,260,,0.1
+mean_area,float,140,4300,,0.1
+mean_smoothness,float,0,1,,0.1
+mean_compactness,float,0,2,,0.1
+mean_concavity,float,0,2,,0.1
+mean_concave,float,0,1,,0.1
+mean_symmetry,float,0,1,,0.1
+mean_fractal,float,0,1,,0.1
+se_radius,float,0,3,,0.1
+se_texture,float,0,5,,0.1
+se_perimeter,float,0,22,,0.1
+se_area,float,6,550,,0.1
+se_smoothness,float,0,1,,0.1
+se_compactness,float,0,1,,0.1
+se_concavity,float,0,1,,0.1
+se_concave,float,0,1,,0.1
+se_symmetry,float,0,1,,0.1
+se_fractal,float,0,1,,0.1
+max_radius,float,6,40,,0.1
+max_texture,float,9,50,,0.1
+max_perimeter,float,40,260,,0.1
+max_area,float,140,4300,,0.1
+max_smoothness,float,0,1,,0.1
+max_compactness,float,0,2,,0.1
+max_concavity,float,0,2,,0.1
+max_concave,float,0,1,,0.1
+max_symmetry,float,0,1,,0.1
+max_fractal,float,0,1,,0.1
\ No newline at end of file
diff --git a/scripts/clean_validate.py b/scripts/clean_validate.py
new file mode 100644
index 0000000..5964079
--- /dev/null
+++ b/scripts/clean_validate.py
@@ -0,0 +1,44 @@
+# clean_validate.py
+# author: Weilin Han
+# date: 2024-10-20
+
+import click
+import os
+import sys
+import pandas as pd
+import pandera as pa
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from src.clean_data import extract_column_name, read_raw_data, clean_data, write_data
+from src.validate_data import build_schema_from_csv, validate_data
+
+@click.command()
+@click.option('--raw-data-file', type=str, help="Path to raw data file")
+@click.option('--name-file', type=str, help="Path to names file")
+@click.option('--data-config-file', type=str, help="Path to data configuration file")
+@click.option('--write-to', type=str, help="Path to directory where cleaned data will be written to")
+@click.option('--written-file-name', type=str, help="The name of the file will be written")
+
+def main(raw_data_file, name_file, data_config_file, write_to, written_file_name):
+    """Clean raw data and validate it."""
+    # Extract column names from .names file
+    colnames = extract_column_name(name_file)
+
+    # Read raw data
+    imported_data = read_raw_data(raw_data_file, colnames)
+
+    # Removing id column and relabel diagnosis column
+    cleaned_data = clean_data(imported_data)
+
+    
+    # Create schema
+    config_df = pd.read_csv(data_config_file)
+    
+    schema=build_schema_from_csv(data_config=config_df, expected_columns=colnames[1:]) #removing id colnames list
+    # Validate cleaned data
+    validate_data(schema=schema, dataframe=cleaned_data)
+
+    # Write data to specified directory
+    write_data(cleaned_data, write_to, written_file_name)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/src/clean_data.py b/src/clean_data.py
new file mode 100644
index 0000000..1120dec
--- /dev/null
+++ b/src/clean_data.py
@@ -0,0 +1,104 @@
+# clean_data.py
+# author: Weilin Han
+# date: 2024-10-15
+
+import pandas as pd
+import re
+import os
+
+
+def extract_column_name(raw_name_file):
+    """Extract and clean column names from .names file."""
+    
+    # Ensure the raw name file exists, if not raise error
+    if not os.path.exists(raw_name_file):
+        raise FileNotFoundError(f"The raw_name file does not exist.")
+    
+    # Extracting column names from downloaded raw file
+    text_lines = []
+    with open(raw_name_file, 'r') as file:
+        for line in file:
+            line = line.strip()
+            if not line.startswith('#') and line:  # Skip comma
+                text_lines.append(line)
+        start = text_lines.index('7. Attribute information')
+        end = text_lines.index('8. Missing attribute values: none')
+        text_lines = text_lines[start:end]
+        
+        pattern = re.compile(r'^[1-9a-z]\)\s*')
+        text_lines = [item for item in text_lines if pattern.match(item)]
+        text_lines = [pattern.sub('', item) for item in text_lines]
+        pattern = re.compile(r'\(.*?\)')
+        text_lines = [re.sub(r"\s+", "_", pattern.sub('', item).strip()) for item in text_lines]
+
+        statistics = ['mean','se','max'] 
+        #se is standard error, and max is the worst or largest (mean of three largest values)
+
+        # please refer to original file for explanation of feactures
+        colnames = text_lines[0:2]
+        for stat in statistics:
+            for feature in text_lines[2:]:
+                colnames.append(stat+'_'+feature)
+        colnames = [col.lower() for col in colnames]
+        
+    return colnames
+    
+def read_data(raw_data, col_name):
+    """Read data from .data or .csv file."""
+
+    # Ensure the raw data file exists, if not raise error
+    if not os.path.exists(raw_data):
+        raise FileNotFoundError(f"The raw_data file does not exist.")
+    
+    # Ensure the col_name is a list, if not raise error
+    if not isinstance(col_name, list):
+        raise TypeError("col_name must be a list.")
+    
+    # Ensure the list has 32 items, if not raise error
+    if len(col_name) != 32:
+        raise ValueError("col_name must contain exactly 32 items.")
+    
+    # Ensure the list only contains strings, if not raise error
+    if not all(isinstance(item, str) for item in col_name):
+        raise ValueError("col_name must only contain strings.")
+    
+    imported_data = pd.read_csv(raw_data, names=col_name, header=None)
+    return imported_data
+
+def clean_data(imported_data, drop_columns=['id'], relabel={'M' : 'Malignant','B' : 'Benign'}):
+    """Clean imported data"""
+    # Ensure the imported_data is a dataframe
+    if not isinstance(imported_data, pd.DataFrame):
+        raise TypeError("imported_data must be a data frame.")
+    
+    # Ensure the drop_columns is a list
+    if not isinstance(drop_columns, list):
+        raise TypeError("drop_columns must be a list.")
+    
+    # Ensure the relabel is a dictionary
+    if not isinstance(relabel, dict):
+        raise TypeError("relabel must be a dictionary")
+    
+    cleaned_data = imported_data.drop(columns=drop_columns)
+    cleaned_data['diagnosis'] = cleaned_data['diagnosis'].replace(relabel)
+    return cleaned_data
+
+def write_data(dataframe, data_to, name_of_file):
+    """Write data to directory"""
+    # Ensure the data_frame is a dataframe, if not raise an error
+    if not isinstance(dataframe, pd.DataFrame):
+        raise TypeError("dataframe must be a pandas data frame.")
+    
+    # Ensure directory path exists, if not raise an error
+    if not os.path.exists(data_to):
+        raise FileNotFoundError('The directory provided does not exist.')
+
+    # Ensure the dirctory path provided is a directory, if not raise an error
+    if not os.path.isdir(data_to):
+        raise NotADirectoryError('The directory path provided is not a directory, it is an existing file path. Please provide a path to a new, or existing directory.')
+    
+    # Ensure the name of file is string, if not raise an error
+    if not isinstance(name_of_file, str):
+        raise TypeError("name_of_file must be string.")
+    
+    dataframe.to_csv(os.path.join(data_to, name_of_file), index=False)
\ No newline at end of file
diff --git a/src/validate_data.py b/src/validate_data.py
new file mode 100644
index 0000000..b440eef
--- /dev/null
+++ b/src/validate_data.py
@@ -0,0 +1,98 @@
+# clean_validate.py
+# author: Weilin Han
+# date: 2024-10-03
+
+import pandas as pd
+import pandera as pa
+
+# Function to build schema from the config file
+def build_schema_from_csv(data_config, expected_columns):
+    """Building schema for validation"""
+
+    # Ensure the data_config is a pandas dataframe
+    if not isinstance(data_config, pd.DataFrame):
+        raise TypeError("data_config must be a pandas dataframe.")
+    
+    # Ensure the data_config has following columns: column,type,max,min,category
+    required_columns = ['column', 'type', 'min', 'max','category', 'max_nullable']
+    missing_columns = [col for col in required_columns if col not in data_config.columns]
+    if missing_columns:
+        raise ValueError(f"The data_config must have following columns: 'column', 'type', 'min', 'max', 'category', 'max_nullable'.")
+
+    # Ensure the values of 'column' match the column names extracted from name file
+    if expected_columns is not None:
+        actual_columns = data_config['column'].str.strip("'").tolist()  # Clean up any extra quotation marks in 'column'
+        if actual_columns != expected_columns:
+            raise ValueError("Column names in the config file do not match the expected columns.")
+    
+
+    schema_dict = {}
+    
+    # Loop through each row in the config DataFrame
+    for _, row in data_config.iterrows():
+        column_name = row['column'].strip()  # Removing potential extra spaces
+        column_type = row['type'].strip()    # Strip any spaces
+        min_value = row['min'] if pd.notna(row['min']) else None
+        max_value = row['max'] if pd.notna(row['max']) else None
+        category_in = row['category'] if pd.notna(row['category']) else None
+        max_nullable = row['max_nullable'] if pd.notna(row['max_nullable']) else None
+        
+        # Define the correct Pandera data type
+        if column_type == 'int':
+            dtype = pa.Int
+        elif column_type == 'float':
+            dtype = pa.Float
+        elif column_type == 'str':
+            dtype = pa.String
+        else:
+            raise ValueError(f"Unsupported column type: {column_type}")
+        
+        # Create value range validation checks
+        value_range_checks = []
+        if min_value is not None:
+            value_range_checks.append(pa.Check.greater_than_or_equal_to(float(min_value),
+                                                                        error=f'Value is smaller than {min_value}'))
+        if max_value is not None:
+            value_range_checks.append(pa.Check.less_than_or_equal_to(float(max_value),
+                                                                     error=f'Value is larger than {max_value}'))
+        if category_in is not None:
+            category_list = category_in.split(',')
+            value_range_checks.append(pa.Check.isin(category_list,
+                                                    error=f'Value not in {category_list}'))
+        if max_nullable is not None:
+            value_range_checks.append(pa.Check(lambda s: s.isna().mean() <= max_nullable,
+                                               error=f'Too many missing values, must have at least {(1-max_nullable)*100}% non-null values.'))
+        
+        # Add the column schema to the schema dictionary
+        schema_dict[column_name] = pa.Column(dtype,nullable=True, checks=value_range_checks)
+
+        global_checks=[
+        pa.Check(lambda df: ~df.duplicated().any(), error="Duplicate rows found."),
+        pa.Check(lambda df: ~(df.isna().all(axis=1)).any(), error="Empty rows found.")
+        ]
+    
+    return pa.DataFrameSchema(schema_dict, checks=global_checks)
+   
+
+# Function to validate schema
+def validate_data(schema, dataframe):
+    """Building schema to validate data using pandera"""
+
+    # Ensure the schema is a pandera schema, if not raise an error
+    if not isinstance(schema, pa.DataFrameSchema):
+        raise TypeError("schema must be a pandera dataframe schema.")
+    
+    # Ensure the data_frame is a dataframe, if not raise an error
+    if not isinstance(dataframe, pd.DataFrame):
+        raise TypeError("dataframe must be a pandas data frame.")
+
+    # Ensure the data_frame has observations, if not raise an error
+    if dataframe.empty:
+        raise ValueError("dataframe must contain observations.")
+    
+    schema.validate(dataframe, lazy=True)
+    # return print(f"Expected Columns:  {expected_columns}, Actual Columns:  {actual_columns}")
+
+
+
+
diff --git a/tests/test_clean_data.py b/tests/test_clean_data.py
new file mode 100644
index 0000000..7ed2632
--- /dev/null
+++ b/tests/test_clean_data.py
@@ -0,0 +1,112 @@
+import pytest
+import pandas as pd
+import os
+import sys
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from src.clean_data import extract_column_name,read_data,clean_data,write_data
+
+# Test files setup
+col_name1 = ["col" + str(i) for i in range(32)] # 32 strings
+col_name2 = {"1":"apple"}
+col_name3 = ['1','2','3']
+col_name4 = ["col" + str(i) for i in range(31)] + [123]  # 31 strings + 1 integer
+
+imported_data1 = pd.DataFrame({
+        'id': [1, 2, 3],
+        'class': ['M', 'B', 'M']
+    })
+imported_data2 = [1, 2, 3, 4, 5]
+drop_columns1=['id']
+drop_columns2={'1':'id'}
+relabel1={'M' : 'Malignant','B' : 'Benign'}
+relabel2=['M','B']
+
+cleaned_data1 = pd.DataFrame({
+        'diagnosis': ['Malignant','Benign','Malignant'],
+        'mean_raius': [1, 2, 3]
+    })
+cleaned_data2 = [1, 2, 3, 4, 5]
+# setup empty directory for data files to be downloaded to
+if not os.path.exists('tests/test_write_data1'):
+    os.makedirs('tests/test_write_data1')
+
+# Tests
+
+# Tests for extract_column_name
+
+# test extract_column_name function throws an error 
+# if the raw name file does not exist
+def test_extract_column_name_error_on_missing_file():
+    with pytest.raises(FileNotFoundError, match='The raw_name file does not exist.'):
+        extract_column_name('tests/test_name_data.name')
+
+# Tests for read_data
+
+# test read_data function throws an error 
+# if the raw data file does not exist
+def test_read_data_error_on_missing_file():
+    with pytest.raises(FileNotFoundError, match='The raw_data file does not exist.'):
+        read_data('tests/test_data.data',col_name1)
+
+# test read_data function throws an error 
+# if the col_name is not a list
+def test_read_data_error_on_non_list():
+    with pytest.raises(TypeError, match="col_name must be a list."):
+        read_data('tests/test_wdbc.data',col_name2)
+
+# test read_data function throws an error 
+# if the col_name does not have 32 values
+def test_read_data_error_on_insufficient_list_item():
+    with pytest.raises(ValueError, match="col_name must contain exactly 32 items."):
+        read_data('tests/test_wdbc.data', col_name3)
+
+# test read_data function throws an error 
+# if the col_name contains items other than string
+def test_read_data_error_on_wrong_item_type():
+    with pytest.raises(ValueError, match="col_name must only contain strings."):
+        read_data('tests/test_wdbc.data', col_name4)
+
+# Tests for clean_data
+
+# test clean_data function throws an error
+# if the imported_data is not a dataframe
+def test_clean_data_error_on_wrong_imported_data_format():
+    with pytest.raises(TypeError, match="imported_data must be a data frame."):
+        clean_data(imported_data2, drop_columns1, relabel1)
+
+# test clean_data function throws an error
+# if the drop_columns is not a list
+def test_clean_data_error_on_wrong_drop_columns_format():
+    with pytest.raises(TypeError, match="drop_columns must be a list."):
+        clean_data(imported_data1, drop_columns2, relabel1)
+
+
+# test clean_data function throws an error
+# if the relabel is not a dictionary
+def test_clean_data_error_on_wrong_relabel_format():
+    with pytest.raises(TypeError, match="relabel must be a dictionary"):
+        clean_data(imported_data1, drop_columns1, relabel2)
+
+# Tests for write_data
+
+# test write_data function throws an error
+# if the dataframe is not a dataframe
+def test_write_data_error_on_wrong_cleaned_data_format():
+    with pytest.raises(TypeError, match="dataframe must be a pandas data frame."):
+        write_data(cleaned_data2, 'tests/', 'test_write_data1')
+
+# test write_data function throws an error 
+# if the write_to path provided does not exist
+def test_write_data_error_on_nonexistent_dir():
+    with pytest.raises(FileNotFoundError, match='The directory provided does not exist.'):
+        write_data(cleaned_data1, 'test/', 'test_write_data2')
+
+# if the directory path provided is not directory
+def test_write_data_error_on_missing_dir():
+    with pytest.raises(NotADirectoryError, match='The directory path provided is not a directory, it is an existing file path. Please provide a path to a new, or existing directory.'):
+        write_data(cleaned_data1, 'tests/conftest.py','test_write_data3')     
+
+# if the name_of_file is not a string
+def test_read_data_error_on_wrong_name_of_file_format():
+    with pytest.raises(TypeError, match='name_of_file must be string.'):
+        write_data(cleaned_data1, 'tests/', 1)
\ No newline at end of file
diff --git a/tests/test_cleaned_data.csv b/tests/test_cleaned_data.csv
new file mode 100644
index 0000000..d7fb5ea
--- /dev/null
+++ b/tests/test_cleaned_data.csv
@@ -0,0 +1,101 @@
+diagnosis,mean_radius,mean_texture,mean_perimeter,mean_area,mean_smoothness,mean_compactness,mean_concavity,mean_concave_points,mean_symmetry,mean_fractal_dimension,se_radius,se_texture,se_perimeter,se_area,se_smoothness,se_compactness,se_concavity,se_concave_points,se_symmetry,se_fractal_dimension,max_radius,max_texture,max_perimeter,max_area,max_smoothness,max_compactness,max_concavity,max_concave_points,max_symmetry,max_fractal_dimension
+Benign,11.16,21.41,70.95,380.3,0.1018,0.05978,0.008955,0.01076,0.1615,0.06144,0.2865,1.678,1.968,18.99,0.006908,0.009442,0.006972,0.006159,0.02694,0.00206,12.36,28.92,79.26,458.0,0.1282,0.1108,0.03582,0.04306,0.2976,0.07123
+Malignant,21.56,22.39,142.0,1479.0,0.111,0.1159,0.2439,0.1389,0.1726,0.05623,1.176,1.256,7.673,158.7,0.0103,0.02891,0.05198,0.02454,0.01114,0.004239,25.45,26.4,166.1,2027.0,0.141,0.2113,0.4107,0.2216,0.206,0.07115
+Malignant,18.45,21.91,120.2,1075.0,0.0943,0.09709,0.1153,0.06847,0.1692,0.05727,0.5959,1.202,3.766,68.35,0.006001,0.01422,0.02855,0.009148,0.01492,0.002205,22.52,31.39,145.6,1590.0,0.1465,0.2275,0.3965,0.1379,0.3109,0.0761
+Benign,10.94,18.59,70.39,370.0,0.1004,0.0746,0.04944,0.02932,0.1486,0.06615,0.3796,1.743,3.018,25.78,0.009519,0.02134,0.0199,0.01155,0.02079,0.002701,12.4,25.58,82.76,472.4,0.1363,0.1644,0.1412,0.07887,0.2251,0.07732
+Malignant,17.08,27.15,111.2,930.9,0.09898,0.111,0.1007,0.06431,0.1793,0.06281,0.9291,1.152,6.051,115.2,0.00874,0.02219,0.02721,0.01458,0.02045,0.004417,22.96,34.49,152.1,1648.0,0.16,0.2444,0.2639,0.1555,0.301,0.0906
+Malignant,16.24,18.77,108.8,805.1,0.1066,0.1802,0.1948,0.09052,0.1876,0.06684,0.2873,0.9173,2.464,28.09,0.004563,0.03481,0.03872,0.01209,0.01388,0.004081,18.55,25.09,126.9,1031.0,0.1365,0.4706,0.5026,0.1732,0.277,0.1063
+Benign,11.74,14.69,76.31,426.0,0.08099,0.09661,0.06726,0.02639,0.1499,0.06758,0.1924,0.6417,1.345,13.04,0.006982,0.03916,0.04017,0.01528,0.0226,0.006822,12.45,17.6,81.25,473.8,0.1073,0.2793,0.269,0.1056,0.2604,0.09879
+Malignant,20.51,27.81,134.4,1319.0,0.09159,0.1074,0.1554,0.0834,0.1448,0.05592,0.524,1.189,3.767,70.01,0.00502,0.02062,0.03457,0.01091,0.01298,0.002887,24.47,37.38,162.7,1872.0,0.1223,0.2761,0.4146,0.1563,0.2437,0.08328
+Benign,11.6,24.49,74.23,417.2,0.07474,0.05688,0.01974,0.01313,0.1935,0.05878,0.2512,1.786,1.961,18.21,0.006122,0.02337,0.01596,0.006998,0.03194,0.002211,12.44,31.62,81.39,476.5,0.09545,0.1361,0.07239,0.04815,0.3244,0.06745
+Malignant,18.63,25.11,124.8,1088.0,0.1064,0.1887,0.2319,0.1244,0.2183,0.06197,0.8307,1.466,5.574,105.0,0.006248,0.03374,0.05196,0.01158,0.02007,0.00456,23.15,34.01,160.5,1670.0,0.1491,0.4257,0.6133,0.1848,0.3444,0.09782
+Benign,11.95,14.96,77.23,426.7,0.1158,0.1206,0.01171,0.01787,0.2459,0.06581,0.361,1.05,2.455,26.65,0.0058,0.02417,0.007816,0.01052,0.02734,0.003114,12.81,17.72,83.09,496.2,0.1293,0.1885,0.03122,0.04766,0.3124,0.0759
+Benign,9.676,13.14,64.12,272.5,0.1255,0.2204,0.1188,0.07038,0.2057,0.09575,0.2744,1.39,1.787,17.67,0.02177,0.04888,0.05189,0.0145,0.02632,0.01148,10.6,18.04,69.47,328.1,0.2006,0.3663,0.2913,0.1075,0.2848,0.1364
+Malignant,17.54,19.32,115.1,951.6,0.08968,0.1198,0.1036,0.07488,0.1506,0.05491,0.3971,0.8282,3.088,40.73,0.00609,0.02569,0.02713,0.01345,0.01594,0.002658,20.42,25.84,139.5,1239.0,0.1381,0.342,0.3508,0.1939,0.2928,0.07867
+Benign,13.34,15.86,86.49,520.0,0.1078,0.1535,0.1169,0.06987,0.1942,0.06902,0.286,1.016,1.535,12.96,0.006794,0.03575,0.0398,0.01383,0.02134,0.004603,15.53,23.19,96.66,614.9,0.1536,0.4791,0.4858,0.1708,0.3527,0.1016
+Benign,10.82,24.21,68.89,361.6,0.08192,0.06602,0.01548,0.00816,0.1976,0.06328,0.5196,1.918,3.564,33.0,0.008263,0.0187,0.01277,0.005917,0.02466,0.002977,13.03,31.45,83.9,505.6,0.1204,0.1633,0.06194,0.03264,0.3059,0.07626
+Malignant,18.31,20.58,120.8,1052.0,0.1068,0.1248,0.1569,0.09451,0.186,0.05941,0.5449,0.9225,3.218,67.36,0.006176,0.01877,0.02913,0.01046,0.01559,0.002725,21.86,26.2,142.2,1493.0,0.1492,0.2536,0.3759,0.151,0.3074,0.07863
+Benign,11.49,14.59,73.99,404.9,0.1046,0.08228,0.05308,0.01969,0.1779,0.06574,0.2034,1.166,1.567,14.34,0.004957,0.02114,0.04156,0.008038,0.01843,0.003614,12.4,21.9,82.04,467.6,0.1352,0.201,0.2596,0.07431,0.2941,0.0918
+Malignant,19.4,18.18,127.2,1145.0,0.1037,0.1442,0.1626,0.09464,0.1893,0.05892,0.4709,0.9951,2.903,53.16,0.005654,0.02199,0.03059,0.01499,0.01623,0.001965,23.79,28.65,152.4,1628.0,0.1518,0.3749,0.4316,0.2252,0.359,0.07787
+Malignant,14.9,22.53,102.1,685.0,0.09947,0.2225,0.2733,0.09711,0.2041,0.06898,0.253,0.8749,3.466,24.19,0.006965,0.06213,0.07926,0.02234,0.01499,0.005784,16.35,27.57,125.4,832.7,0.1419,0.709,0.9019,0.2475,0.2866,0.1155
+Benign,14.81,14.7,94.66,680.7,0.08472,0.05016,0.03416,0.02541,0.1659,0.05348,0.2182,0.6232,1.677,20.72,0.006708,0.01197,0.01482,0.01056,0.0158,0.001779,15.61,17.58,101.7,760.2,0.1139,0.1011,0.1101,0.07955,0.2334,0.06142
+Benign,13.11,22.54,87.02,529.4,0.1002,0.1483,0.08705,0.05102,0.185,0.0731,0.1931,0.9223,1.491,15.09,0.005251,0.03041,0.02526,0.008304,0.02514,0.004198,14.55,29.16,99.48,639.3,0.1349,0.4402,0.3162,0.1126,0.4128,0.1076
+Malignant,17.6,23.33,119.0,980.5,0.09289,0.2004,0.2136,0.1002,0.1696,0.07369,0.9289,1.465,5.801,104.9,0.006766,0.07025,0.06591,0.02311,0.01673,0.0113,21.57,28.87,143.6,1437.0,0.1207,0.4785,0.5165,0.1996,0.2301,0.1224
+Benign,10.16,19.59,64.73,311.7,0.1003,0.07504,0.005025,0.01116,0.1791,0.06331,0.2441,2.09,1.648,16.8,0.01291,0.02222,0.004174,0.007082,0.02572,0.002278,10.65,22.88,67.88,347.3,0.1265,0.12,0.01005,0.02232,0.2262,0.06742
+Benign,8.888,14.64,58.79,244.0,0.09783,0.1531,0.08606,0.02872,0.1902,0.0898,0.5262,0.8522,3.168,25.44,0.01721,0.09368,0.05671,0.01766,0.02541,0.02193,9.733,15.67,62.56,284.4,0.1207,0.2436,0.1434,0.04786,0.2254,0.1084
+Benign,10.48,14.98,67.49,333.6,0.09816,0.1013,0.06335,0.02218,0.1925,0.06915,0.3276,1.127,2.564,20.77,0.007364,0.03867,0.05263,0.01264,0.02161,0.00483,12.13,21.57,81.41,440.4,0.1327,0.2996,0.2939,0.0931,0.302,0.09646
+Malignant,17.57,15.05,115.0,955.1,0.09847,0.1157,0.09875,0.07953,0.1739,0.06149,0.6003,0.8225,4.655,61.1,0.005627,0.03033,0.03407,0.01354,0.01925,0.003742,20.01,19.52,134.9,1227.0,0.1255,0.2812,0.2489,0.1456,0.2756,0.07919
+Benign,12.78,16.49,81.37,502.5,0.09831,0.05234,0.03653,0.02864,0.159,0.05653,0.2368,0.8732,1.471,18.33,0.007962,0.005612,0.01585,0.008662,0.02254,0.001906,13.46,19.76,85.67,554.9,0.1296,0.07061,0.1039,0.05882,0.2383,0.0641
+Benign,11.63,29.29,74.87,415.1,0.09357,0.08574,0.0716,0.02017,0.1799,0.06166,0.3135,2.426,2.15,23.13,0.009861,0.02418,0.04275,0.009215,0.02475,0.002128,13.12,38.81,86.04,527.8,0.1406,0.2031,0.2923,0.06835,0.2884,0.0722
+Benign,11.2,29.37,70.67,386.0,0.07449,0.03558,0.0,0.0,0.106,0.05502,0.3141,3.896,2.041,22.81,0.007594,0.008878,0.0,0.0,0.01989,0.001773,11.92,38.3,75.19,439.6,0.09267,0.05494,0.0,0.0,0.1566,0.05905
+Benign,13.37,16.39,86.1,553.5,0.07115,0.07325,0.08092,0.028,0.1422,0.05823,0.1639,1.14,1.223,14.66,0.005919,0.0327,0.04957,0.01038,0.01208,0.004076,14.26,22.75,91.99,632.1,0.1025,0.2531,0.3308,0.08978,0.2048,0.07628
+Malignant,17.42,25.56,114.5,948.0,0.1006,0.1146,0.1682,0.06597,0.1308,0.05866,0.5296,1.667,3.767,58.53,0.03113,0.08555,0.1438,0.03927,0.02175,0.01256,18.07,28.07,120.4,1021.0,0.1243,0.1793,0.2803,0.1099,0.1603,0.06818
+Malignant,13.81,23.75,91.56,597.8,0.1323,0.1768,0.1558,0.09176,0.2251,0.07421,0.5648,1.93,3.909,52.72,0.008824,0.03108,0.03112,0.01291,0.01998,0.004506,19.2,41.85,128.5,1153.0,0.2226,0.5209,0.4646,0.2013,0.4432,0.1086
+Benign,12.47,18.6,81.09,481.9,0.09965,0.1058,0.08005,0.03821,0.1925,0.06373,0.3961,1.044,2.497,30.29,0.006953,0.01911,0.02701,0.01037,0.01782,0.003586,14.97,24.64,96.05,677.9,0.1426,0.2378,0.2671,0.1015,0.3014,0.0875
+Benign,14.4,26.99,92.25,646.1,0.06995,0.05223,0.03476,0.01737,0.1707,0.05433,0.2315,0.9112,1.727,20.52,0.005356,0.01679,0.01971,0.00637,0.01414,0.001892,15.4,31.98,100.4,734.6,0.1017,0.146,0.1472,0.05563,0.2345,0.06464
+Benign,11.75,17.56,75.89,422.9,0.1073,0.09713,0.05282,0.0444,0.1598,0.06677,0.4384,1.907,3.149,30.66,0.006587,0.01815,0.01737,0.01316,0.01835,0.002318,13.5,27.98,88.52,552.3,0.1349,0.1854,0.1366,0.101,0.2478,0.07757
+Benign,12.36,21.8,79.78,466.1,0.08772,0.09445,0.06015,0.03745,0.193,0.06404,0.2978,1.502,2.203,20.95,0.007112,0.02493,0.02703,0.01293,0.01958,0.004463,13.83,30.5,91.46,574.7,0.1304,0.2463,0.2434,0.1205,0.2972,0.09261
+Malignant,15.13,29.81,96.71,719.5,0.0832,0.04605,0.04686,0.02739,0.1852,0.05294,0.4681,1.627,3.043,45.38,0.006831,0.01427,0.02489,0.009087,0.03151,0.00175,17.26,36.91,110.1,931.4,0.1148,0.09866,0.1547,0.06575,0.3233,0.06165
+Malignant,21.09,26.57,142.7,1311.0,0.1141,0.2832,0.2487,0.1496,0.2395,0.07398,0.6298,0.7629,4.414,81.46,0.004253,0.04759,0.03872,0.01567,0.01798,0.005295,26.68,33.48,176.5,2089.0,0.1491,0.7584,0.678,0.2903,0.4098,0.1284
+Malignant,17.35,23.06,111.0,933.1,0.08662,0.0629,0.02891,0.02837,0.1564,0.05307,0.4007,1.317,2.577,44.41,0.005726,0.01106,0.01246,0.007671,0.01411,0.001578,19.85,31.47,128.2,1218.0,0.124,0.1486,0.1211,0.08235,0.2452,0.06515
+Benign,8.671,14.45,54.42,227.2,0.09138,0.04276,0.0,0.0,0.1722,0.06724,0.2204,0.7873,1.435,11.36,0.009172,0.008007,0.0,0.0,0.02711,0.003399,9.262,17.04,58.36,259.2,0.1162,0.07057,0.0,0.0,0.2592,0.07848
+Benign,13.78,15.79,88.37,585.9,0.08817,0.06718,0.01055,0.009937,0.1405,0.05848,0.3563,0.4833,2.235,29.34,0.006432,0.01156,0.007741,0.005657,0.01227,0.002564,15.27,17.5,97.9,706.6,0.1072,0.1071,0.03517,0.03312,0.1859,0.0681
+Malignant,17.95,20.01,114.2,982.0,0.08402,0.06722,0.07293,0.05596,0.2129,0.05025,0.5506,1.214,3.357,54.04,0.004024,0.008422,0.02291,0.009863,0.05014,0.001902,20.58,27.83,129.2,1261.0,0.1072,0.1202,0.2249,0.1185,0.4882,0.06111
+Benign,10.66,15.15,67.49,349.6,0.08792,0.04302,0.0,0.0,0.1928,0.05975,0.3309,1.925,2.155,21.98,0.008713,0.01017,0.0,0.0,0.03265,0.001002,11.54,19.2,73.2,408.3,0.1076,0.06791,0.0,0.0,0.271,0.06164
+Malignant,17.93,24.48,115.2,998.9,0.08855,0.07027,0.05699,0.04744,0.1538,0.0551,0.4212,1.433,2.765,45.81,0.005444,0.01169,0.01622,0.008522,0.01419,0.002751,20.92,34.69,135.1,1320.0,0.1315,0.1806,0.208,0.1136,0.2504,0.07948
+Benign,12.25,22.44,78.18,466.5,0.08192,0.052,0.01714,0.01261,0.1544,0.05976,0.2239,1.139,1.577,18.04,0.005096,0.01205,0.00941,0.004551,0.01608,0.002399,14.17,31.99,92.74,622.9,0.1256,0.1804,0.123,0.06335,0.31,0.08203
+Benign,12.49,16.85,79.19,481.6,0.08511,0.03834,0.004473,0.006423,0.1215,0.05673,0.1716,0.7151,1.047,12.69,0.004928,0.003012,0.00262,0.00339,0.01393,0.001344,13.34,19.71,84.48,544.2,0.1104,0.04953,0.01938,0.02784,0.1917,0.06174
+Benign,11.6,12.84,74.34,412.6,0.08983,0.07525,0.04196,0.0335,0.162,0.06582,0.2315,0.5391,1.475,15.75,0.006153,0.0133,0.01693,0.006884,0.01651,0.002551,13.06,17.16,82.96,512.5,0.1431,0.1851,0.1922,0.08449,0.2772,0.08756
+Malignant,17.02,23.98,112.8,899.3,0.1197,0.1496,0.2417,0.1203,0.2248,0.06382,0.6009,1.398,3.999,67.78,0.008268,0.03082,0.05042,0.01112,0.02102,0.003854,20.88,32.09,136.1,1344.0,0.1634,0.3559,0.5588,0.1847,0.353,0.08482
+Benign,14.2,20.53,92.41,618.4,0.08931,0.1108,0.05063,0.03058,0.1506,0.06009,0.3478,1.018,2.749,31.01,0.004107,0.03288,0.02821,0.0135,0.0161,0.002744,16.45,27.26,112.1,828.5,0.1153,0.3429,0.2512,0.1339,0.2534,0.07858
+Benign,11.9,14.65,78.11,432.8,0.1152,0.1296,0.0371,0.03003,0.1995,0.07839,0.3962,0.6538,3.021,25.03,0.01017,0.04741,0.02789,0.0111,0.03127,0.009423,13.15,16.51,86.26,509.6,0.1424,0.2517,0.0942,0.06042,0.2727,0.1036
+Benign,9.0,14.4,56.36,246.3,0.07005,0.03116,0.003681,0.003472,0.1788,0.06833,0.1746,1.305,1.144,9.789,0.007389,0.004883,0.003681,0.003472,0.02701,0.002153,9.699,20.07,60.9,285.5,0.09861,0.05232,0.01472,0.01389,0.2991,0.07804
+Benign,13.94,13.17,90.31,594.2,0.1248,0.09755,0.101,0.06615,0.1976,0.06457,0.5461,2.635,4.091,44.74,0.01004,0.03247,0.04763,0.02853,0.01715,0.005528,14.62,15.38,94.52,653.3,0.1394,0.1364,0.1559,0.1015,0.216,0.07253
+Benign,14.97,19.76,95.5,690.2,0.08421,0.05352,0.01947,0.01939,0.1515,0.05266,0.184,1.065,1.286,16.64,0.003634,0.007983,0.008268,0.006432,0.01924,0.00152,15.98,25.82,102.3,782.1,0.1045,0.09995,0.0775,0.05754,0.2646,0.06085
+Benign,10.05,17.53,64.41,310.8,0.1007,0.07326,0.02511,0.01775,0.189,0.06331,0.2619,2.015,1.778,16.85,0.007803,0.01449,0.0169,0.008043,0.021,0.002778,11.16,26.84,71.98,384.0,0.1402,0.1402,0.1055,0.06499,0.2894,0.07664
+Benign,12.22,20.04,79.47,453.1,0.1096,0.1152,0.08175,0.02166,0.2124,0.06894,0.1811,0.7959,0.9857,12.58,0.006272,0.02198,0.03966,0.009894,0.0132,0.003813,13.16,24.17,85.13,515.3,0.1402,0.2315,0.3535,0.08088,0.2709,0.08839
+Malignant,14.68,20.13,94.74,684.5,0.09867,0.072,0.07395,0.05259,0.1586,0.05922,0.4727,1.24,3.195,45.4,0.005718,0.01162,0.01998,0.01109,0.0141,0.002085,19.07,30.88,123.4,1138.0,0.1464,0.1871,0.2914,0.1609,0.3029,0.08216
+Malignant,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
+Benign,11.43,15.39,73.06,399.8,0.09639,0.06889,0.03503,0.02875,0.1734,0.05865,0.1759,0.9938,1.143,12.67,0.005133,0.01521,0.01434,0.008602,0.01501,0.001588,12.32,22.02,79.93,462.0,0.119,0.1648,0.1399,0.08476,0.2676,0.06765
+Malignant,19.55,28.77,133.6,1207.0,0.0926,0.2063,0.1784,0.1144,0.1893,0.06232,0.8426,1.199,7.158,106.4,0.006356,0.04765,0.03863,0.01519,0.01936,0.005252,25.05,36.27,178.6,1926.0,0.1281,0.5329,0.4251,0.1941,0.2818,0.1005
+Benign,9.72,18.22,60.73,288.1,0.0695,0.02344,0.0,0.0,0.1653,0.06447,0.3539,4.885,2.23,21.69,0.001713,0.006736,0.0,0.0,0.03799,0.001688,9.968,20.83,62.25,303.8,0.07117,0.02729,0.0,0.0,0.1909,0.06559
+Malignant,13.28,20.28,87.32,545.2,0.1041,0.1436,0.09847,0.06158,0.1974,0.06782,0.3704,0.8249,2.427,31.33,0.005072,0.02147,0.02185,0.00956,0.01719,0.003317,17.38,28.0,113.1,907.2,0.153,0.3724,0.3664,0.1492,0.3739,0.1027
+Malignant,14.25,22.15,96.42,645.7,0.1049,0.2008,0.2135,0.08653,0.1949,0.07292,0.7036,1.268,5.373,60.78,0.009407,0.07056,0.06899,0.01848,0.017,0.006113,17.67,29.51,119.1,959.5,0.164,0.6247,0.6922,0.1785,0.2844,0.1132
+Malignant,18.77,21.43,122.9,1092.0,0.09116,0.1402,0.106,0.0609,0.1953,0.06083,0.6422,1.53,4.369,88.25,0.007548,0.03897,0.03914,0.01816,0.02168,0.004445,24.54,34.37,161.1,1873.0,0.1498,0.4827,0.4634,0.2048,0.3679,0.0987
+Benign,11.26,19.83,71.3,388.1,0.08511,0.04413,0.005067,0.005664,0.1637,0.06343,0.1344,1.083,0.9812,9.332,0.0042,0.0059,0.003846,0.004065,0.01487,0.002295,11.93,26.43,76.38,435.9,0.1108,0.07723,0.02533,0.02832,0.2557,0.07613
+Benign,7.691,25.44,48.34,170.4,0.08668,0.1199,0.09252,0.01364,0.2037,0.07751,0.2196,1.479,1.445,11.73,0.01547,0.06457,0.09252,0.01364,0.02105,0.007551,8.678,31.89,54.49,223.6,0.1596,0.3064,0.3393,0.05,0.279,0.1066
+Malignant,13.0,21.82,87.5,519.8,0.1273,0.1932,0.1859,0.09353,0.235,0.07389,0.3063,1.002,2.406,24.32,0.005731,0.03502,0.03553,0.01226,0.02143,0.003749,15.49,30.73,106.2,739.3,0.1703,0.5401,0.539,0.206,0.4378,0.1072
+Malignant,18.25,19.98,119.6,1040.0,0.09463,0.109,0.1127,0.074,0.1794,0.05742,0.4467,0.7732,3.18,53.91,0.004314,0.01382,0.02254,0.01039,0.01369,0.002179,22.88,27.66,153.2,1606.0,0.1442,0.2576,0.3784,0.1932,0.3063,0.08368
+Benign,13.05,18.59,85.09,512.0,0.1082,0.1304,0.09603,0.05603,0.2035,0.06501,0.3106,1.51,2.59,21.57,0.007807,0.03932,0.05112,0.01876,0.0286,0.005715,14.19,24.85,94.22,591.2,0.1343,0.2658,0.2573,0.1258,0.3113,0.08317
+Benign,14.92,14.93,96.45,686.9,0.08098,0.08549,0.05539,0.03221,0.1687,0.05669,0.2446,0.4334,1.826,23.31,0.003271,0.0177,0.0231,0.008399,0.01148,0.002379,17.18,18.22,112.0,906.6,0.1065,0.2791,0.3151,0.1147,0.2688,0.08273
+Benign,11.28,13.39,73.0,384.8,0.1164,0.1136,0.04635,0.04796,0.1771,0.06072,0.3384,1.343,1.851,26.33,0.01127,0.03498,0.02187,0.01965,0.0158,0.003442,11.92,15.77,76.53,434.0,0.1367,0.1822,0.08669,0.08611,0.2102,0.06784
+Malignant,13.82,24.49,92.33,595.9,0.1162,0.1681,0.1357,0.06759,0.2275,0.07237,0.4751,1.528,2.974,39.05,0.00968,0.03856,0.03476,0.01616,0.02434,0.006995,16.01,32.94,106.0,788.0,0.1794,0.3966,0.3381,0.1521,0.3651,0.1183
+Benign,10.25,16.18,66.52,324.2,0.1061,0.1111,0.06726,0.03965,0.1743,0.07279,0.3677,1.471,1.597,22.68,0.01049,0.04265,0.04004,0.01544,0.02719,0.007596,11.28,20.61,71.53,390.4,0.1402,0.236,0.1898,0.09744,0.2608,0.09702
+Benign,14.05,27.15,91.38,600.4,0.09929,0.1126,0.04462,0.04304,0.1537,0.06171,0.3645,1.492,2.888,29.84,0.007256,0.02678,0.02071,0.01626,0.0208,0.005304,15.3,33.17,100.2,706.7,0.1241,0.2264,0.1326,0.1048,0.225,0.08321
+Benign,11.41,14.92,73.53,402.0,0.09059,0.08155,0.06181,0.02361,0.1167,0.06217,0.3344,1.108,1.902,22.77,0.007356,0.03728,0.05915,0.01712,0.02165,0.004784,12.37,17.7,79.12,467.2,0.1121,0.161,0.1648,0.06296,0.1811,0.07427
+Benign,13.71,18.68,88.73,571.0,0.09916,0.107,0.05385,0.03783,0.1714,0.06843,0.3191,1.249,2.284,26.45,0.006739,0.02251,0.02086,0.01352,0.0187,0.003747,15.11,25.63,99.43,701.9,0.1425,0.2566,0.1935,0.1284,0.2849,0.09031
+Benign,12.3,19.02,77.88,464.4,0.08313,0.04202,0.007756,0.008535,0.1539,0.05945,0.184,1.532,1.199,13.24,0.007881,0.008432,0.007004,0.006522,0.01939,0.002222,13.35,28.46,84.53,544.3,0.1222,0.09052,0.03619,0.03983,0.2554,0.07207
+Benign,11.3,18.19,73.93,389.4,0.09592,0.1325,0.1548,0.02854,0.2054,0.07669,0.2428,1.642,2.369,16.39,0.006663,0.05914,0.0888,0.01314,0.01995,0.008675,12.58,27.96,87.16,472.9,0.1347,0.4848,0.7436,0.1218,0.3308,0.1297
+Benign,8.598,20.98,54.66,221.8,0.1243,0.08963,0.03,0.009259,0.1828,0.06757,0.3582,2.067,2.493,18.39,0.01193,0.03162,0.03,0.009259,0.03357,0.003048,9.565,27.04,62.06,273.9,0.1639,0.1698,0.09001,0.02778,0.2972,0.07712
+Malignant,12.83,22.33,85.26,503.2,0.1088,0.1799,0.1695,0.06861,0.2123,0.07254,0.3061,1.069,2.257,25.13,0.006983,0.03858,0.04683,0.01499,0.0168,0.005617,15.2,30.15,105.3,706.0,0.1777,0.5343,0.6282,0.1977,0.3407,0.1243
+Malignant,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678
+Malignant,10.95,21.35,71.9,371.1,0.1227,0.1218,0.1044,0.05669,0.1895,0.0687,0.2366,1.428,1.822,16.97,0.008064,0.01764,0.02595,0.01037,0.01357,0.00304,12.84,35.34,87.22,514.0,0.1909,0.2698,0.4023,0.1424,0.2964,0.09606
+Benign,12.89,15.7,84.08,516.6,0.07818,0.0958,0.1115,0.0339,0.1432,0.05935,0.2913,1.389,2.347,23.29,0.006418,0.03961,0.07927,0.01774,0.01878,0.003696,13.9,19.69,92.12,595.6,0.09926,0.2317,0.3344,0.1017,0.1999,0.07127
+Malignant,20.73,31.12,135.7,1419.0,0.09469,0.1143,0.1367,0.08646,0.1769,0.05674,1.172,1.617,7.749,199.7,0.004551,0.01478,0.02143,0.00928,0.01367,0.002299,32.49,47.16,214.0,3432.0,0.1401,0.2644,0.3442,0.1659,0.2868,0.08218
+Malignant,14.42,19.77,94.48,642.5,0.09752,0.1141,0.09388,0.05839,0.1879,0.0639,0.2895,1.851,2.376,26.85,0.008005,0.02895,0.03321,0.01424,0.01462,0.004452,16.33,30.86,109.5,826.4,0.1431,0.3026,0.3194,0.1565,0.2718,0.09353
+Malignant,15.46,11.89,102.5,736.9,0.1257,0.1555,0.2032,0.1097,0.1966,0.07069,0.4209,0.6583,2.805,44.64,0.005393,0.02321,0.04303,0.0132,0.01792,0.004168,18.79,17.04,125.0,1102.0,0.1531,0.3583,0.583,0.1827,0.3216,0.101
+Malignant,11.84,18.7,77.93,440.6,0.1109,0.1516,0.1218,0.05182,0.2301,0.07799,0.4825,1.03,3.475,41.0,0.005551,0.03414,0.04205,0.01044,0.02273,0.005667,16.82,28.12,119.4,888.7,0.1637,0.5775,0.6956,0.1546,0.4761,0.1402
+Benign,8.571,13.1,54.53,221.3,0.1036,0.07632,0.02565,0.0151,0.1678,0.07126,0.1267,0.6793,1.069,7.254,0.007897,0.01762,0.01801,0.00732,0.01592,0.003925,9.473,18.45,63.3,275.6,0.1641,0.2235,0.1754,0.08512,0.2983,0.1049
+Benign,10.03,21.28,63.19,307.3,0.08117,0.03912,0.00247,0.005159,0.163,0.06439,0.1851,1.341,1.184,11.6,0.005724,0.005697,0.002074,0.003527,0.01445,0.002411,11.11,28.94,69.92,376.3,0.1126,0.07094,0.01235,0.02579,0.2349,0.08061
+Benign,11.51,23.93,74.52,403.5,0.09261,0.1021,0.1112,0.04105,0.1388,0.0657,0.2388,2.904,1.936,16.97,0.0082,0.02982,0.05738,0.01267,0.01488,0.004738,12.48,37.16,82.28,474.2,0.1298,0.2517,0.363,0.09653,0.2112,0.08732
+Benign,13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,0.05766,0.2699,0.7886,2.058,23.56,0.008462,0.0146,0.02387,0.01315,0.0198,0.0023,15.11,19.26,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259
+Benign,11.68,16.17,75.49,420.5,0.1128,0.09263,0.04279,0.03132,0.1853,0.06401,0.3713,1.154,2.554,27.57,0.008998,0.01292,0.01851,0.01167,0.02152,0.003213,13.32,21.59,86.57,549.8,0.1526,0.1477,0.149,0.09815,0.2804,0.08024
+Malignant,19.18,22.49,127.5,1148.0,0.08523,0.1428,0.1114,0.06772,0.1767,0.05529,0.4357,1.073,3.833,54.22,0.005524,0.03698,0.02706,0.01221,0.01415,0.003397,23.36,32.06,166.4,1688.0,0.1322,0.5601,0.3865,0.1708,0.3193,0.09221
+Benign,12.34,14.95,78.29,469.1,0.08682,0.04571,0.02109,0.02054,0.1571,0.05708,0.3833,0.9078,2.602,30.15,0.007702,0.008491,0.01307,0.0103,0.0297,0.001432,13.18,16.85,84.11,533.1,0.1048,0.06744,0.04921,0.04793,0.2298,0.05974
+Malignant,20.44,21.78,133.8,1293.0,0.0915,0.1131,0.09799,0.07785,0.1618,0.05557,0.5781,0.9168,4.218,72.44,0.006208,0.01906,0.02375,0.01461,0.01445,0.001906,24.31,26.37,161.2,1780.0,0.1327,0.2376,0.2702,0.1765,0.2609,0.06735
+Benign,14.04,15.98,89.78,611.2,0.08458,0.05895,0.03534,0.02944,0.1714,0.05898,0.3892,1.046,2.644,32.74,0.007976,0.01295,0.01608,0.009046,0.02005,0.00283,15.66,21.58,101.2,750.0,0.1195,0.1252,0.1117,0.07453,0.2725,0.07234
+Benign,9.755,28.2,61.68,290.9,0.07984,0.04626,0.01541,0.01043,0.1621,0.05952,0.1781,1.687,1.243,11.28,0.006588,0.0127,0.0145,0.006104,0.01574,0.002268,10.67,36.92,68.03,349.9,0.111,0.1109,0.0719,0.04866,0.2321,0.07211
+Benign,12.34,12.27,78.94,468.5,0.09003,0.06307,0.02958,0.02647,0.1689,0.05808,0.1166,0.4957,0.7714,8.955,0.003681,0.009169,0.008732,0.00574,0.01129,0.001366,13.61,19.27,87.22,564.9,0.1292,0.2074,0.1791,0.107,0.311,0.07592
+Malignant,14.99,25.2,95.54,698.8,0.09387,0.05131,0.02398,0.02899,0.1565,0.05504,1.214,2.188,8.077,106.0,0.006883,0.01094,0.01818,0.01917,0.007882,0.001754,14.99,25.2,95.54,698.8,0.09387,0.05131,0.02398,0.02899,0.1565,0.05504
+Benign,10.57,20.22,70.15,338.3,0.09073,0.166,0.228,0.05941,0.2188,0.0845,0.1115,1.231,2.363,7.228,0.008499,0.07643,0.1535,0.02919,0.01617,0.0122,10.85,22.82,76.51,351.9,0.1143,0.3619,0.603,0.1465,0.2597,0.12
+Benign,13.14,20.74,85.98,536.9,0.08675,0.1089,0.1085,0.0351,0.1562,0.0602,0.3152,0.7884,2.312,27.4,0.007295,0.03179,0.04615,0.01254,0.01561,0.00323,14.8,25.46,100.9,689.1,0.1351,0.3549,0.4504,0.1181,0.2563,0.08174
\ No newline at end of file
diff --git a/tests/test_data_config.csv b/tests/test_data_config.csv
new file mode 100644
index 0000000..93225de
--- /dev/null
+++ b/tests/test_data_config.csv
@@ -0,0 +1,32 @@
+column,type,min,max,category,max_nullable
+diagnosis,str,,,"Malignant,Benign",0
+mean_radius,float,6,40,,0.1
+mean_texture,float,9,50,,0.1
+mean_perimeter,float,40,260,,0.1
+mean_area,float,140,4300,,0.1
+mean_smoothness,float,0,1,,0.1
+mean_compactness,float,0,2,,0.1
+mean_concavity,float,0,2,,0.1
+mean_concave_points,float,0,1,,0.1
+mean_symmetry,float,0,1,,0.1
+mean_fractal_dimension,float,0,1,,0.1
+se_radius,float,0,3,,0.1
+se_texture,float,0,5,,0.1
+se_perimeter,float,0,22,,0.1
+se_area,float,6,550,,0.1
+se_smoothness,float,0,1,,0.1
+se_compactness,float,0,1,,0.1
+se_concavity,float,0,1,,0.1
+se_concave_points,float,0,1,,0.1
+se_symmetry,float,0,1,,0.1
+se_fractal_dimension,float,0,1,,0.1
+max_radius,float,6,40,,0.1
+max_texture,float,9,50,,0.1
+max_perimeter,float,40,260,,0.1
+max_area,float,140,4300,,0.1
+max_smoothness,float,0,1,,0.1
+max_compactness,float,0,2,,0.1
+max_concavity,float,0,2,,0.1
+max_concave_points,float,0,1,,0.1
+max_symmetry,float,0,1,,0.1
+max_fractal_dimension,float,0,1,,0.1
\ No newline at end of file
diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py
new file mode 100644
index 0000000..15606ed
--- /dev/null
+++ b/tests/test_validate_data.py
@@ -0,0 +1,154 @@
+import pytest
+import os
+import pandas as pd
+import pandera as pa
+import numpy as np
+from pandera import Column, DataFrameSchema
+import sys
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from src.validate_data import build_schema_from_csv, validate_data
+from src.clean_data import extract_column_name
+
+# Test setup for build_schema_from_csv
+
+invalid_data_config1 = pd.DataFrame({
+    'column':['diagnosis','mean_radius'],
+    'type':['str','float'],
+    'min':[None,4],
+    'max':[None,30]
+})
+invalid_data_config2 = pd.DataFrame({
+    'column':['diagnosis','mean_texture'],
+    'type':['str','float'],
+    'min':[None,4],
+    'max':[None,30],
+    'category':["Malignant,Benign",None],
+    'max_nullable':0.1
+})
+valid_data_config = pd.DataFrame({
+    'column':['diagnosis','mean_radius'],
+    'type':['str','float'],
+    'min':[None,6],
+    'max':[None,40],
+    'category':["Malignant,Benign",None],
+    'max_nullable':0.1
+})
+
+valid_colnames = ['diagnosis','mean_radius']
+invalid_data_type = [1, 2, 3, 4, 5]
+
+# Tests for build_schema_from_csv
+
+# test build_schema_from_csv function throws an error
+# if the data_config is not a dataframe
+def test_build_schema_from_csv_error_on_wrong_data_config_type():
+    with pytest.raises(TypeError, match="data_config must be a pandas dataframe."):
+        build_schema_from_csv(data_config=invalid_data_type, expected_columns=valid_colnames)
+
+# if pandas dataframe doesn't have following columns: column,type,max,min,category
+def test_build_schema_from_csv_error_on_incorrect_columns():
+    with pytest.raises(ValueError, match=f"The data_config must have following columns: 'column', 'type', 'min', 'max', 'category'."):
+        build_schema_from_csv(data_config=invalid_data_config1, expected_columns=valid_colnames)
+
+# if the values of 'column' match the column names extracted from name file
+def test_build_schedma_from_csv_error_on_mismatch_column_names():
+    with pytest.raises(ValueError, match="Column names in the config file do not match the expected columns."):
+        build_schema_from_csv(data_config=invalid_data_config2, expected_columns=valid_colnames)
+
+# Tests setup for validate_data function
+
+data_config_df = pd.read_csv('tests/test_data_config.csv')
+colnames = extract_column_name('tests/test_wdbc.names')[1:] #removing column name: 'id'
+
+valid_schema = build_schema_from_csv(data_config=data_config_df,expected_columns=colnames)
+invalid_schema = [1]
+
+valid_data = pd.read_csv('tests/test_cleaned_data.csv', nrows=3)
+empty_data_frame = valid_data.copy().iloc[0:0]
+
+# Setup list of invalid data cases 
+invalid_data_cases = []
+
+# Case: missing "diagnosis" column
+case_missing_class_col = valid_data.copy()
+case_missing_class_col = case_missing_class_col.drop("diagnosis", axis=1)  # drop class column
+invalid_data_cases.append((case_missing_class_col, "`diagnosis` from DataFrameSchema"))
+
+# Case: label in "diagnosis" column encoded as 0 and 1, instead of Benign and Malignant
+case_wrong_label_type = valid_data.copy()
+case_wrong_label_type["diagnosis"] = case_wrong_label_type["diagnosis"].map({'Benign': 0, 'Malignant': 1})
+invalid_data_cases.append((case_wrong_label_type, "Check incorrect type for'diagnosis' values is missing or incorrect"))
+
+# Case: wrong string value/category in "diagnosis" column
+case_wrong_category_label = valid_data.copy()
+case_wrong_category_label.loc[0, "diagnosis"] = "benign"
+invalid_data_cases.append((case_wrong_category_label, "Check absent or incorrect for wrong string value/category in 'diagnosis' column"))
+
+# Case: missing value in "diagnosis" column
+case_missing_class = valid_data.copy()
+case_missing_class.loc[0, "diagnosis"] = None
+invalid_data_cases.append((case_missing_class, "Check absent or incorrect for missing/null 'diagnosis' value"))
+
+# Case: missing numeric columns (one for each numeric column) where column is missing
+numeric_columns = valid_data.select_dtypes(include=np.number).columns
+for col in numeric_columns:
+    case_missing_col = valid_data.copy()
+    case_missing_col = case_missing_col.drop(col, axis=1)  # drop column
+    invalid_data_cases.append((case_missing_col, f"'{col}' is missing from DataFrameSchema"))
+    
+# Generate 30 cases (one for each numeric column) where data is out of range (too large)
+numeric_columns = valid_data.select_dtypes(include=np.number).columns
+for col in numeric_columns:
+    case_too_big = valid_data.copy()
+    case_too_big[col] = case_too_big[col] + 5000  # Adding an arbitrary value to make it out of range
+    invalid_data_cases.append((case_too_big, f"Check absent or incorrect for numeric values in '{col}' being too large"))
+
+# Generate 30 cases (one for each numeric column) where data is out of range (too small)
+numeric_columns = valid_data.select_dtypes(include=np.number).columns
+for col in numeric_columns:
+    case_too_small = valid_data.copy()
+    case_too_small[col] = case_too_small[col] - 1000  # Adding an arbitrary value to make it out of range
+    invalid_data_cases.append((case_too_small, f"Check absent or incorrect for numeric values in '{col}' being too small"))
+
+# Generate 30 cases (one for each numeric column) where data is the wrong type
+numeric_columns = valid_data.select_dtypes(include=np.number).columns
+for col in numeric_columns:
+    case_wrong_type = valid_data.copy()
+    case_wrong_type[col] = case_wrong_type[col].fillna(0.0).astype(int) # convert from float to int
+    invalid_data_cases.append((case_wrong_type, f"Check incorrect type for float values in '{col}' is missing or incorrect"))
+
+# Case: duplicate observations
+case_duplicate = valid_data.copy()
+case_duplicate = pd.concat([case_duplicate, case_duplicate.iloc[[0], :]], ignore_index=True)
+invalid_data_cases.append((case_duplicate, f"Check absent or incorrect for duplicate rows"))
+
+# Case: entire missing observation
+case_missing_obs = valid_data.copy()
+nan_row = pd.DataFrame([[np.nan] * (case_missing_obs.shape[1] - 1) + [np.nan]], columns=case_missing_obs.columns)
+case_missing_obs = pd.concat([case_missing_obs, nan_row], ignore_index=True)
+invalid_data_cases.append((case_missing_obs, f"Check absent or incorrect for missing observations (e.g., a row of all missing values)"))
+
+
+# Tests for validate_data function
+
+# test build_schema_from_csv function throws an error
+# if the schema is invalid pandera dataframe schema
+def test_validate_data_error_on_invalid_schema_type():
+    with pytest.raises(TypeError, match='schema must be a pandera dataframe schema.'):
+        validate_data(schema=invalid_schema, dataframe=valid_data)
+
+# if the dataframe is not a pandas data frame
+def test_validate_data_error_on_invalid_dataframe_type():
+    with pytest.raises(TypeError, match='dataframe must be a pandas data frame.'):
+        validate_data(schema=valid_schema, dataframe=invalid_data_type)
+
+# if the dataframe has no observations
+def test_validate_data_error_on_empty_dataframe_type():
+    with pytest.raises(ValueError):
+        validate_data(schema=valid_schema, dataframe=empty_data_frame)
+
+# if the dataframe has invalid data
+@pytest.mark.parametrize("invalid_data, description", invalid_data_cases)
+def test_valid_w_invalid_data(invalid_data, description):
+    with pytest.raises(pa.errors.SchemaErrors):
+        validate_data(schema=valid_schema, dataframe=invalid_data)
diff --git a/tests/test_wdbc.data b/tests/test_wdbc.data
new file mode 100644
index 0000000..2daaa02
--- /dev/null
+++ b/tests/test_wdbc.data
@@ -0,0 +1,10 @@
+842302,M,17.99,10.38,122.8,1001,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
+842517,M,20.57,17.77,132.9,1326,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956,0.1238,0.1866,0.2416,0.186,0.275,0.08902
+84300903,M,19.69,21.25,130,1203,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
+84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
+84358402,M,20.29,14.34,135.1,1297,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575,0.1374,0.205,0.4,0.1625,0.2364,0.07678
+843786,M,12.45,15.7,82.57,477.1,0.1278,0.17,0.1578,0.08089,0.2087,0.07613,0.3345,0.8902,2.217,27.19,0.00751,0.03345,0.03672,0.01137,0.02165,0.005082,15.47,23.75,103.4,741.6,0.1791,0.5249,0.5355,0.1741,0.3985,0.1244
+844359,M,18.25,19.98,119.6,1040,0.09463,0.109,0.1127,0.074,0.1794,0.05742,0.4467,0.7732,3.18,53.91,0.004314,0.01382,0.02254,0.01039,0.01369,0.002179,22.88,27.66,153.2,1606,0.1442,0.2576,0.3784,0.1932,0.3063,0.08368
+84458202,M,13.71,20.83,90.2,577.9,0.1189,0.1645,0.09366,0.05985,0.2196,0.07451,0.5835,1.377,3.856,50.96,0.008805,0.03029,0.02488,0.01448,0.01486,0.005412,17.06,28.14,110.6,897,0.1654,0.3682,0.2678,0.1556,0.3196,0.1151
+844981,M,13,21.82,87.5,519.8,0.1273,0.1932,0.1859,0.09353,0.235,0.07389,0.3063,1.002,2.406,24.32,0.005731,0.03502,0.03553,0.01226,0.02143,0.003749,15.49,30.73,106.2,739.3,0.1703,0.5401,0.539,0.206,0.4378,0.1072
+84501001,M,12.46,24.04,83.97,475.9,0.1186,0.2396,0.2273,0.08543,0.203,0.08243,0.2976,1.599,2.039,23.94,0.007149,0.07217,0.07743,0.01432,0.01789,0.01008,15.09,40.68,97.65,711.4,0.1853,1.058,1.105,0.221,0.4366,0.2075
diff --git a/tests/test_wdbc.names b/tests/test_wdbc.names
new file mode 100644
index 0000000..3af8990
--- /dev/null
+++ b/tests/test_wdbc.names
@@ -0,0 +1,140 @@
+1. Title: Wisconsin Diagnostic Breast Cancer (WDBC)
+
+2. Source Information
+
+a) Creators: 
+
+	Dr. William H. Wolberg, General Surgery Dept., University of
+	Wisconsin,  Clinical Sciences Center, Madison, WI 53792
+	wolberg@eagle.surgery.wisc.edu
+
+	W. Nick Street, Computer Sciences Dept., University of
+	Wisconsin, 1210 West Dayton St., Madison, WI 53706
+	street@cs.wisc.edu  608-262-6619
+
+	Olvi L. Mangasarian, Computer Sciences Dept., University of
+	Wisconsin, 1210 West Dayton St., Madison, WI 53706
+	olvi@cs.wisc.edu 
+
+b) Donor: Nick Street
+
+c) Date: November 1995
+
+3. Past Usage:
+
+first usage:
+
+	W.N. Street, W.H. Wolberg and O.L. Mangasarian 
+	Nuclear feature extraction for breast tumor diagnosis.
+	IS&T/SPIE 1993 International Symposium on Electronic Imaging: Science
+	and Technology, volume 1905, pages 861-870, San Jose, CA, 1993.
+
+OR literature:
+
+	O.L. Mangasarian, W.N. Street and W.H. Wolberg. 
+	Breast cancer diagnosis and prognosis via linear programming. 
+	Operations Research, 43(4), pages 570-577, July-August 1995.
+
+Medical literature:
+
+	W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 
+	Machine learning techniques to diagnose breast cancer from
+	fine-needle aspirates.  
+	Cancer Letters 77 (1994) 163-171.
+
+	W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 
+	Image analysis and machine learning applied to breast cancer
+	diagnosis and prognosis.  
+	Analytical and Quantitative Cytology and Histology, Vol. 17
+	No. 2, pages 77-87, April 1995. 
+
+	W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 
+	Computerized breast cancer diagnosis and prognosis from fine
+	needle aspirates.  
+	Archives of Surgery 1995;130:511-516.
+
+	W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 
+	Computer-derived nuclear features distinguish malignant from
+	benign breast cytology.  
+	Human Pathology, 26:792--796, 1995.
+
+See also:
+	http://www.cs.wisc.edu/~olvi/uwmp/mpml.html
+	http://www.cs.wisc.edu/~olvi/uwmp/cancer.html
+
+Results:
+
+	- predicting field 2, diagnosis: B = benign, M = malignant
+	- sets are linearly separable using all 30 input features
+	- best predictive accuracy obtained using one separating plane
+		in the 3-D space of Worst Area, Worst Smoothness and
+		Mean Texture.  Estimated accuracy 97.5% using repeated
+		10-fold crossvalidations.  Classifier has correctly
+		diagnosed 176 consecutive new patients as of November
+		1995. 
+
+4. Relevant information
+
+	Features are computed from a digitized image of a fine needle
+	aspirate (FNA) of a breast mass.  They describe
+	characteristics of the cell nuclei present in the image.
+	A few of the images can be found at
+	http://www.cs.wisc.edu/~street/images/
+
+	Separating plane described above was obtained using
+	Multisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree
+	Construction Via Linear Programming." Proceedings of the 4th
+	Midwest Artificial Intelligence and Cognitive Science Society,
+	pp. 97-101, 1992], a classification method which uses linear
+	programming to construct a decision tree.  Relevant features
+	were selected using an exhaustive search in the space of 1-4
+	features and 1-3 separating planes.
+
+	The actual linear program used to obtain the separating plane
+	in the 3-dimensional space is that described in:
+	[K. P. Bennett and O. L. Mangasarian: "Robust Linear
+	Programming Discrimination of Two Linearly Inseparable Sets",
+	Optimization Methods and Software 1, 1992, 23-34].
+
+
+	This database is also available through the UW CS ftp server:
+
+	ftp ftp.cs.wisc.edu
+	cd math-prog/cpo-dataset/machine-learn/WDBC/
+
+5. Number of instances: 569 
+
+6. Number of attributes: 32 (ID, diagnosis, 30 real-valued input features)
+
+7. Attribute information
+
+1) ID number
+2) Diagnosis (M = malignant, B = benign)
+3-32)
+
+Ten real-valued features are computed for each cell nucleus:
+
+	a) radius (mean of distances from center to points on the perimeter)
+	b) texture (standard deviation of gray-scale values)
+	c) perimeter
+	d) area
+	e) smoothness (local variation in radius lengths)
+	f) compactness (perimeter^2 / area - 1.0)
+	g) concavity (severity of concave portions of the contour)
+	h) concave points (number of concave portions of the contour)
+	i) symmetry 
+	j) fractal dimension ("coastline approximation" - 1)
+
+Several of the papers listed above contain detailed descriptions of
+how these features are computed. 
+
+The mean, standard error, and "worst" or largest (mean of the three
+largest values) of these features were computed for each image,
+resulting in 30 features.  For instance, field 3 is Mean Radius, field
+13 is Radius SE, field 23 is Worst Radius.
+
+All feature values are recoded with four significant digits.
+
+8. Missing attribute values: none
+
+9. Class distribution: 357 benign, 212 malignant
\ No newline at end of file