diff --git a/.gitignore b/.gitignore index 601486e5..76682c66 100644 --- a/.gitignore +++ b/.gitignore @@ -3,8 +3,8 @@ /dist/ *.pypirc /*.pypirc -/build/ /idea/ /.vscode/ /pytest_cache/ -/venv/ \ No newline at end of file +/venv/ +/build/ \ No newline at end of file diff --git a/datasist/visualizations.py b/datasist/visualizations.py index 78aa8aa9..42a3b2b9 100644 --- a/datasist/visualizations.py +++ b/datasist/visualizations.py @@ -7,11 +7,12 @@ import numpy as np import matplotlib.pyplot as plt import seaborn as sns -from . import structdata +from datasist import structdata from IPython.display import display from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import sklearn.metrics as sklm +from ipywidgets import widgets @@ -658,4 +659,48 @@ def autoviz(data): from autoviz.AutoViz_Class import AutoViz_Class av = AutoViz_Class() - av.AutoViz(filename='', dfte=data, max_cols_analyzed=50) \ No newline at end of file + av.AutoViz(filename='', dfte=data, max_cols_analyzed=50) + + +def features_plot(data,target): + ''' + Makes a scatter plot for a particular numerical feature chosen by a drawdown to show their relationship with the target. + + Parameters + ------------ + + data : DataFrame, array, or list of arrays. + + The data to plot. + + target: string or integer. + + The target(label) column name in the dataset, if not provided, + we this function throw an error. + + + + Returns + ------- + A drawsown containing only numerical features together with the selected features scatter plot against the target + ''' + + if data is None: + raise ValueError("data: Expecting a DataFrame or Series, got 'None'") + + if data[target].dtype != 'float64' and data[target].dtype != 'int64': + raise ValueError("target: target not a continous value") + + features = [c for c in data.columns if (data[c].dtype == 'float64' or data[c].dtype == 'int64') and c != target] + + + def plot_feature(column): + plt.plot(data[column], data[target], '.') + plt.xlabel(column) + plt.ylabel('target') + + dropdown_menu = [k for k in features] + + return widgets.interact(plot_feature, column=dropdown_menu); + + \ No newline at end of file