From 8d8ffafbb0d85ca95eb06039028a91b580a99efa Mon Sep 17 00:00:00 2001 From: Sanjana Date: Tue, 13 Feb 2024 09:13:27 -0800 Subject: [PATCH] Links update (#251) * Updated desc for odd size in readme * fixed language * Added odd size issue to issues description in tutorial: * Removed unnecessary line causing a warning message * Updated instructions for skipping notebook execution * Updated absolute links to relative links in documentation * added hidden tags to dataset download cells * Updated link checker' * Updated link checker' * Updated link checker' * Updated link checker' * Updated link checker' * Updated tutorial * Revert accidentally hidden cells * Updated tqdm to tqdm.auto * Updated docs requirements * Updated tutorial notebooks * Updated tags --- .github/workflows/links.yml | 13 ++++++--- DEVELOPMENT.md | 2 +- README.md | 2 +- docs/requirements.txt | 29 +++++++------------ docs/source/conf.py | 1 - docs/source/faq.rst | 2 +- docs/source/index.rst | 8 ++--- docs/source/tutorials/custom_issue_manager.py | 2 +- .../tutorials/huggingface_dataset.ipynb | 23 ++++++++++++--- .../tutorials/torchvision_dataset.ipynb | 11 ++++--- docs/source/tutorials/tutorial.ipynb | 25 +++++++++++----- 11 files changed, 72 insertions(+), 46 deletions(-) diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml index ee0f1da7..f5bef932 100644 --- a/.github/workflows/links.yml +++ b/.github/workflows/links.yml @@ -17,10 +17,15 @@ jobs: find . -name '*.html' -delete - run: | find . -name '*.md' -exec pandoc -i {} -o {}.html \; - - uses: anishathalye/proof-html@v1 + - uses: anishathalye/proof-html@v2 with: directory: . + check_html: false check_favicon: false - empty_alt_ignore: true - url_ignore_re: | - ^https:\/\/twitter.com\/CleanlabAI \ No newline at end of file + ignore_missing_alt: true + ignore_empty_alt: true + tokens: | + {"https://github.com": "${{ secrets.GITHUB_TOKEN }}"} + swap_urls: | + {"^(\\..*)\\.md(#?.*)$": "\\1.md.html\\2", + "^(https://github\\.com/.*)#.*$": "\\1"} \ No newline at end of file diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index b9506a1c..08fec546 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -123,7 +123,7 @@ pip install -r docs/requirements.txt sphinx-build docs/source cleanvision-docs ``` -**Note for faster build**: Executing the Jupyter Notebooks (i.e., the .ipynb files) that make up some portion of the docs, such as the tutorials, takes a long time. If you want to skip rendering these, set the environment variable `SKIP_NOTEBOOKS=1`. You can either set this using `export SKIP_NOTEBOOKS=1` +**Note for faster build**: Executing the Jupyter Notebooks (i.e., the .ipynb files) that make up some portion of the docs, such as the tutorials, takes a long time. If you want to skip rendering these, add `nbsphinx_execute = 'never' to [sphinx configuration](docs/source/conf.py) 4. To view the docs open the file `cleanvision-docs/index.html` file in a browser. diff --git a/README.md b/README.md index 49b530df..8abf44e9 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ In any collection of image files (most [formats](https://pillow.readthedocs.io/e | 6 | Light | Irregularly bright images (*over*exposed) | light | ![](https://raw.githubusercontent.com/cleanlab/assets/master/cleanvision/example_issue_images/light.jpg) | | 7 | Grayscale | Images lacking color | grayscale | ![](https://raw.githubusercontent.com/cleanlab/assets/master/cleanvision/example_issue_images/grayscale.jpg) | | 8 | Odd Aspect Ratio | Images with an unusual aspect ratio (overly skinny/wide) | odd_aspect_ratio | ![](https://raw.githubusercontent.com/cleanlab/assets/master/cleanvision/example_issue_images/odd_aspect_ratio.jpg) | -| 9 | Odd Size | Images that are abnormally large or small | odd_size | | +| 9 | Odd Size | Images that are abnormally large or small compared to the rest of the dataset | odd_size | | CleanVision supports Linux, macOS, and Windows and runs on Python 3.7+. diff --git a/docs/requirements.txt b/docs/requirements.txt index 7e8d2e17..1174e4af 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,19 +1,12 @@ -sphinx==5.1.1 -sphinx-tabs==3.4.1 -nbsphinx==0.8.8 -autodocsumm==0.2.9 +sphinx==7.1.2 +sphinx-tabs==3.4.5 +nbsphinx==0.9.3 +autodocsumm==0.2.12 sphinx-multiversion==0.2.4 -sphinx-copybutton==0.5.0 -sphinxcontrib-katex==0.8.6 -sphinx-autodoc-typehints==1.19.2 -furo==2022.06.21 -numpy>=1.20.0 -pandas>=1.1.5 -Pillow>=9.3 -matplotlib>=3.4 -tqdm>=4.53.0 -imagehash>=4.2.0 -datasets>=2.7.0 -torchvision>=0.12.0 -ipykernel==6.8.0 -ipywidgets==7.6.5 +sphinx-copybutton==0.5.2 +sphinxcontrib-katex==0.9.9 +sphinx-autodoc-typehints==1.25.2 +furo==2023.09.10 +ipykernel==6.29.0 +ipywidgets==8.1.1 +ipython==8.0.1 \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index ea986507..5dc5aa02 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -77,7 +77,6 @@ html_title = "" html_theme = "furo" -html_static_path = ["_static"] html_logo = "https://raw.githubusercontent.com/cleanlab/assets/master/cleanlab/cleanlab_logo_only.png" html_theme_options = { diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 34e00264..23fe78b0 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -10,7 +10,7 @@ CleanVision is independent of any machine learning tasks as it directly works on 2. **Can I check for specific issues in my dataset?** -Yes, you can specify issues like ``light`` or ``blurry`` in the issue_types argument when calling ``Imagelab.find_issues`` +Yes, you can specify issues like ``light`` or ``blurry`` in the issue_types argument when calling :py:meth:`~cleanvision.imagelab.Imagelab.find_issues` .. code-block:: python3 diff --git a/docs/source/index.rst b/docs/source/index.rst index 3bb9c871..a05586fe 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -5,7 +5,7 @@ Documentation ======================================= -CleanVision automatically detects various issues in image datasets, such as images that are: (near) duplicates, blurry, +CleanVision automatically detects various issues in your image data, such as images that are: (near) duplicates, blurry, over/under-exposed, etc. This data-centric AI package is designed as a quick first step for any computer vision project to find problems in your dataset, which you may want to address before applying machine learning. @@ -120,9 +120,9 @@ CleanVision works smoothly with Torchvision datasets too: Additional Resources -------------------- -- Get started with our `Example Notebook `_ -- Explore more `Example Notebooks `_ -- Learn how to contribute in the `Contribution Guide `_ +- Get started with `Starter Tutorial `_. +- View more `code examples `_ that demonstrate how to use CleanVision on various datasets. +- Interested in contributing to CleanVision? Check out our `Contribution Guide `_ to get started. .. toctree:: diff --git a/docs/source/tutorials/custom_issue_manager.py b/docs/source/tutorials/custom_issue_manager.py index df737733..823264ee 100644 --- a/docs/source/tutorials/custom_issue_manager.py +++ b/docs/source/tutorials/custom_issue_manager.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd from PIL import Image -from tqdm import tqdm +from tqdm.auto import tqdm from cleanvision.dataset.base_dataset import Dataset from cleanvision.issue_managers import register_issue_manager diff --git a/docs/source/tutorials/huggingface_dataset.ipynb b/docs/source/tutorials/huggingface_dataset.ipynb index ff53cb4a..fb8e6e98 100644 --- a/docs/source/tutorials/huggingface_dataset.ipynb +++ b/docs/source/tutorials/huggingface_dataset.ipynb @@ -44,6 +44,19 @@ "from cleanvision import Imagelab" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -60,7 +73,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "dataset = load_dataset(\"cats_vs_dogs\", split=\"train\")" @@ -184,7 +199,7 @@ "metadata": {}, "outputs": [], "source": [ - "imagelab.issues" + "imagelab.issues.head()" ] }, { @@ -243,7 +258,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**For more detailed guide on how to use CleanVision, check the [tutorial notebook](https://github.com/cleanlab/cleanvision/blob/main/docs/source/tutorials/tutorial.ipynb).**" + "**For more detailed guide on how to use CleanVision, check the** [tutorial notebook](tutorial.ipynb)." ] } ], @@ -263,7 +278,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/docs/source/tutorials/torchvision_dataset.ipynb b/docs/source/tutorials/torchvision_dataset.ipynb index 35bbf207..290aa5cf 100644 --- a/docs/source/tutorials/torchvision_dataset.ipynb +++ b/docs/source/tutorials/torchvision_dataset.ipynb @@ -70,9 +70,12 @@ "cell_type": "code", "execution_count": null, "id": "3d207006", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ + "%%capture\n", "train_set = CIFAR10(root=\"./\", download=True)\n", "test_set = CIFAR10(root=\"./\", train=False, download=True)" ] @@ -200,7 +203,7 @@ "metadata": {}, "outputs": [], "source": [ - "imagelab.issues" + "imagelab.issues.head()" ] }, { @@ -264,7 +267,7 @@ "id": "75912aea", "metadata": {}, "source": [ - "**For more detailed guide on how to use CleanVision, check the [tutorial notebook](https://github.com/cleanlab/cleanvision/blob/main/docs/source/tutorials/tutorial.ipynb).**" + "**For more detailed guide on how to use CleanVision, check the** [tutorial notebook](tutorial.ipynb)." ] } ], @@ -284,7 +287,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.0" + "version": "3.10.0" } }, "nbformat": 4, diff --git a/docs/source/tutorials/tutorial.ipynb b/docs/source/tutorials/tutorial.ipynb index b5b2fdda..8b756846 100644 --- a/docs/source/tutorials/tutorial.ipynb +++ b/docs/source/tutorials/tutorial.ipynb @@ -59,6 +59,7 @@ "| 6 | Blurry | Images that are blurry or out of focus | blurry |\n", "| 7 | Grayscale | Images that are grayscale (lacking color) | grayscale |\n", "| 8 | Low Information | Images that lack much information (e.g. a completely black image with a few white dots) | low_information |\n", + "| 9 | Odd Size | Images that are abnormally large or small compared to the rest of the dataset | odd_size |\n", "\n", "\n", "The **Issue Key** column specifies the name for each type of issue in CleanVision code. See our examples which use these keys to detect only particular issue types and specify nondefault parameter settings to use when checking for certain issues." @@ -150,7 +151,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The main way to interface with your data is via the [Imagelab](https://cleanvision.readthedocs.io/en/latest/cleanvision/imagelab.html#cleanvision.imagelab.Imagelab) class. This class can be used to understand the issues in your dataset at a high level (global overview) and low level (issues and quality scores for each image) as well as additional information about the dataset. It has three main attributes:\n", + "The main way to interface with your data is via the [Imagelab](../cleanvision/imagelab.rst#cleanvision.imagelab.Imagelab) class. This class can be used to understand the issues in your dataset at a high level (global overview) and low level (issues and quality scores for each image) as well as additional information about the dataset. It has three main attributes:\n", "\n", "- `Imagelab.issue_summary`\n", "- `Imagelab.issues`\n", @@ -645,7 +646,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "You can also create a custom issue type by extending the base class `IssueManager`. CleanVision can then detect your custom issue along with other pre-defined issues in any image dataset! Here's an example of a custom issue manager, which can also be found in the [examples/](https://github.com/cleanlab/cleanvision/blob/main/examples/custom_issue_manager.py) folder of the source code." + "You can also create a custom issue type by extending the base class [IssueManager](../cleanvision/utils/base_issue_manager.rst#cleanvision.utils.base_issue_manager.IssueManager). CleanVision can then detect your custom issue along with other pre-defined issues in any image dataset! Here's an example of a custom issue manager, which can also be found [here](https://github.com/cleanlab/cleanvision/blob/main/docs/source/tutorials/custom_issue_manager.py)" ] }, { @@ -659,7 +660,7 @@ "import numpy as np\n", "import pandas as pd\n", "from PIL import Image\n", - "from tqdm import tqdm\n", + "from tqdm.auto import tqdm\n", "\n", "from cleanvision.dataset.base_dataset import Dataset\n", "from cleanvision.issue_managers import register_issue_manager\n", @@ -778,11 +779,21 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "issue_types = {issue_name: {}}\n", - "imagelab.find_issues(issue_types)\n", + "imagelab.find_issues(issue_types)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "imagelab.report()" ] }, @@ -791,7 +802,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Beyond the collection of image files demonstrated here, you can alternatively run CleanVision on: [Hugging Face datasets](https://github.com/cleanlab/cleanvision/blob/main/docs/source/tutorials/huggingface_dataset.ipynb), [torchvision datasets](https://github.com/cleanlab/cleanvision/blob/main/docs/source/tutorials/torchvision_dataset.ipynb), as well as [files in cloud storage buckets like S3, GCS, or Azure](https://github.com/cleanlab/cleanvision-examples/blob/main/cloud_dataset.ipynb)." + "Beyond the collection of image files demonstrated here, you can alternatively run CleanVision on: [Hugging Face datasets](huggingface_dataset.ipynb), [torchvision datasets](torchvision_dataset.ipynb), as well as [files in cloud storage buckets like S3, GCS, or Azure](https://github.com/cleanlab/cleanvision-examples/blob/main/cloud_dataset.ipynb)." ] } ], @@ -811,7 +822,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.2" + "version": "3.11.7" } }, "nbformat": 4,