diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d4bb2cbb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css new file mode 100644 index 00000000..aa36f2e9 --- /dev/null +++ b/docs/_static/css/custom.css @@ -0,0 +1,4 @@ +.wy-side-nav-search { + background-color: #2980b9; + color: #fcfcfc; +} diff --git a/docs/bias_fairness.rst b/docs/bias_fairness.rst new file mode 100644 index 00000000..9173f55f --- /dev/null +++ b/docs/bias_fairness.rst @@ -0,0 +1,8 @@ +.. +Bias & Fairness +========================================= + +.. automodule:: ydata_quality.bias_fairness + :members: + :noindex: + diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..69203146 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,63 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('./src/ydata-quality')) + + +# -- Project information ----------------------------------------------------- + +project = 'ydata-quality' +copyright = '2021, YData' +author = 'YData' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.napoleon'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +html_css_files = [ + 'css/custom.css', +] + +# -- Napoleon configuration --------------------------------------------------- +napoleon_google_docstring = True +napoleon_numpy_docstring = False + + +# -- YData configuration ------------------------------------------------------ +html_logo = 'ydata_logo.png' diff --git a/docs/data_expectations.rst b/docs/data_expectations.rst new file mode 100644 index 00000000..ec384261 --- /dev/null +++ b/docs/data_expectations.rst @@ -0,0 +1,8 @@ +.. +Data Expectations +========================================= + +.. automodule:: ydata_quality.data_expectations + :members: + :noindex: + diff --git a/docs/data_quality.rst b/docs/data_quality.rst new file mode 100644 index 00000000..1cc46f8f --- /dev/null +++ b/docs/data_quality.rst @@ -0,0 +1,7 @@ +.. +Data Quality +========================================= + +.. automodule:: ydata_quality.core.data_quality + :members: + :noindex: diff --git a/docs/drift.rst b/docs/drift.rst new file mode 100644 index 00000000..ee72faaf --- /dev/null +++ b/docs/drift.rst @@ -0,0 +1,8 @@ +.. +Drift Analysis +========================================= + +.. automodule:: ydata_quality.drift + :members: + :noindex: + diff --git a/docs/duplicates.rst b/docs/duplicates.rst new file mode 100644 index 00000000..77579127 --- /dev/null +++ b/docs/duplicates.rst @@ -0,0 +1,8 @@ +.. +Duplicates +========================================= + +.. automodule:: ydata_quality.duplicates + :members: + :noindex: + diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..7e516b89 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,23 @@ +.. ydata-quality documentation master file, created by + sphinx-quickstart on Wed Sep 8 23:46:14 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to ydata-quality's documentation! +========================================= + +.. toctree:: + :maxdepth: 2 + :caption: Core: + + data_quality + bias_fairness + data_expectations + drift + duplicates + labelling + missings + valued_missing_values + + + diff --git a/docs/labelling.rst b/docs/labelling.rst new file mode 100644 index 00000000..cee54ad7 --- /dev/null +++ b/docs/labelling.rst @@ -0,0 +1,14 @@ +.. +Labelling +========================================= + +.. automodule:: ydata_quality.labelling + :members: + :noindex: + +.. autoclass:: ydata_quality.labelling.engine.CategoricalLabelInspector + :members: + +.. autoclass:: ydata_quality.labelling.engine.NumericalLabelInspector + :members: + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..2119f510 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/missings.rst b/docs/missings.rst new file mode 100644 index 00000000..0061e773 --- /dev/null +++ b/docs/missings.rst @@ -0,0 +1,8 @@ +.. +Missings +========================================= + +.. automodule:: ydata_quality.missings + :members: + :noindex: + diff --git a/docs/valued_missing_values.rst b/docs/valued_missing_values.rst new file mode 100644 index 00000000..7a1dc310 --- /dev/null +++ b/docs/valued_missing_values.rst @@ -0,0 +1,8 @@ +.. +Valued Missing Values +========================================= + +.. automodule:: ydata_quality.valued_missing_values + :members: + :noindex: + diff --git a/docs/warnings.rst b/docs/warnings.rst new file mode 100644 index 00000000..21adada7 --- /dev/null +++ b/docs/warnings.rst @@ -0,0 +1,30 @@ +======== +Warnings +======== + +Structure +--------- +A QualityWarning contains all the necessary data required for a Data Scientist to understand and assess the impact of a given data quality issue found during the data quality evaluation. + +A QualityWarning is composed by: + * Category: name of the main test suite (e.g. Duplicates, Bias&Fairness) + * Test: name of the individual test (e.g. Exact Duplicates, Performance Bias) + * Description: long-text description of the data quality details + * Priority: expected impact of the data quality warning + * Data: sample of data that showcases the data quality warning + +Priorities +---------- +The Priority aims to provide a quick and intuitive level of severity of a QualityWarning. + +======== ============ +Priority Description +======== ============ +P0 Blocker. Critical issues that block using the dataset. +P1 High. Heavy impact expected on downstream application. +P2 Medium. Allows usage but may block human-intelligible insights. +P3 Low. Minor impact, aesthetic. No impact on downstream application. +======== ============ + +Technically, a Priority is implemented as an OrderedEnum so that we can apply comparison operators (<, <=, >, >=). More details on OrderedEnum are available in the utils sub-package. + diff --git a/docs/ydata_logo.png b/docs/ydata_logo.png new file mode 100644 index 00000000..d41bbb80 Binary files /dev/null and b/docs/ydata_logo.png differ diff --git a/requirements-dev.txt b/requirements-dev.txt index 9bfbee1c..7a46dcee 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,6 @@ isort pylint pytest -sphinx +sphinx==4.1.2 +sphinx_rtd_theme myst-parser