add new ocr cards (#670)

JohnSnowLabs · Sep 28, 2023 · 37c7f6e · 37c7f6e
1 parent 200beb0
commit 37c7f6e
Show file tree

Hide file tree

Showing 18 changed files with 672 additions and 93 deletions.
diff --git a/docs/_posts/aymanechilah/2023-01-10-general_model_table_detection_v2_en_3_2.md b/docs/_posts/aymanechilah/2023-01-10-general_model_table_detection_v2_en_3_2.md
@@ -25,7 +25,7 @@ Here it is used the CascadeTabNet general model for table detection inspired by
 ## Predicted Entities
 
 {:.btn-box}
-<button class="button button-orange" disabled>Live Demo</button>
+[Live Demo](https://demo.johnsnowlabs.com/ocr/IMAGE_TABLE_DETECTION_ONLY/){:.button.button-orange.button-orange-trans.co.button-icon}
 [Open in Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-ocr-workshop/blob/master/jupyter/Cards/SparkOcrImageTableDetection.ipynb){:.button.button-orange.button-orange-trans.co.button-icon}
 [Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/ocr/general_model_table_detection_v2_en_3.3.0_3.0_1623301511401.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
 [Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/ocr/general_model_table_detection_v2_en_3.3.0_3.0_1623301511401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}

diff --git a/docs/_posts/aymanechilah/2023-01-10-visualner_keyvalue_10kfilings_en_3_2.md b/docs/_posts/aymanechilah/2023-01-10-visualner_keyvalue_10kfilings_en_3_2.md
@@ -26,7 +26,7 @@ This is a Form Recognition / Key Value extraction model, trained on the summary
 `KEY`, `VALUE`, `HEADER`
 
 {:.btn-box}
-[Live Demo](https://nlp.johnsnowlabs.com/demos){:.button.button-orange.button-orange-trans.co.button-icon}
+[Live Demo](https://demo.johnsnowlabs.com/finance/VISUALNER_10KFILINGS/){:.button.button-orange.button-orange-trans.co.button-icon}
 [Open in Colab](https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/finance-nlp/90.2.Financial_Visual_NER.ipynb){:.button.button-orange.button-orange-trans.co.button-icon}
 [Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/ocr/visualner_keyvalue_10kfilings_en_4.0.0_3.2_1663781115795.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
 [Copy S3 URI](s3://auxdata.johnsnowlabs.com/clinical/ocr/visualner_keyvalue_10kfilings_en_4.0.0_3.2_1663781115795.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}

diff --git a/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_en_3_2.md b/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_en_3_2.md
@@ -49,7 +49,7 @@ text_detector = ImageTextDetectorV2 \
     .setLinkThreshold(0.3) \
     .setWidth(500)
 
-ocr = ImageToTextV2Opt.pretrained("ocr_base_handwritten_v2", "en", "clinical/ocr") \
+ocr = ImageToTextV2.pretrained("ocr_base_handwritten_v2", "en", "clinical/ocr") \
     .setInputCols(["image", "text_regions"]) \
     .setGroupImages(True) \
     .setOutputCol("text") \
@@ -91,7 +91,7 @@ val text_detector = ImageTextDetectorV2
     .setLinkThreshold(0.3) 
     .setWidth(500)
 
-val ocr = ImageToTextV2Opt
+val ocr = ImageToTextV2
     .pretrained("ocr_base_handwritten_v2", "en", "clinical/ocr") 
     .setInputCols(Array("image", "text_regions")) 
     .setGroupImages(True) 

diff --git a/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_opt_en_3_2.md b/docs/_posts/aymanechilah/2023-01-17-ocr_base_handwritten_v2_opt_en_3_2.md
@@ -50,7 +50,7 @@ text_detector = ImageTextDetectorV2 \
     .setLinkThreshold(0.3) \
     .setWidth(500)
 
-ocr = ImageToTextV2Opt.pretrained("ocr_base_handwritten_v2_opt", "en", "clinical/ocr") \
+ocr = ImageToTextV2.pretrained("ocr_base_handwritten_v2_opt", "en", "clinical/ocr") \
     .setInputCols(["image", "text_regions"]) \
     .setGroupImages(True) \
     .setOutputCol("text") \
@@ -92,7 +92,7 @@ val text_detector = ImageTextDetectorV2
     .setLinkThreshold(0.3) 
     .setWidth(500)
 
-val ocr = ImageToTextV2Opt
+val ocr = ImageToTextV2
     .pretrained("ocr_base_handwritten_v2_opt", "en", "clinical/ocr") 
     .setInputCols(Array("image", "text_regions")) 
     .setGroupImages(True) 

diff --git a/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_en_3_2.md b/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_en_3_2.md
@@ -50,7 +50,7 @@ text_detector = ImageTextDetectorV2 \
     .setLinkThreshold(0.3) \
     .setWidth(500)
 
-ocr = ImageToTextV2Opt.pretrained("ocr_base_printed_v2", "en", "clinical/ocr") \
+ocr = ImageToTextV2.pretrained("ocr_base_printed_v2", "en", "clinical/ocr") \
     .setInputCols(["image", "text_regions"]) \
     .setGroupImages(True) \
     .setOutputCol("text") \
@@ -90,7 +90,7 @@ val text_detector = ImageTextDetectorV2
     .setLinkThreshold(0.3) 
     .setWidth(500)
 
-val ocr = ImageToTextV2Opt
+val ocr = ImageToTextV2
     .pretrained("ocr_base_printed_v2", "en", "clinical/ocr") 
     .setInputCols(Array("image", "text_regions")) 
     .setGroupImages(True) 

diff --git a/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_opt_en_3_2.md b/docs/_posts/aymanechilah/2023-01-17-ocr_base_printed_v2_opt_en_3_2.md
@@ -49,7 +49,7 @@ text_detector = ImageTextDetectorV2 \
     .setLinkThreshold(0.3) \
     .setWidth(500)
 
-ocr = ImageToTextV2Opt.pretrained("ocr_base_printed_v2_opt", "en", "clinical/ocr") \
+ocr = ImageToTextV2.pretrained("ocr_base_printed_v2_opt", "en", "clinical/ocr") \
     .setInputCols(["image", "text_regions"]) \
     .setGroupImages(True) \
     .setOutputCol("text") \
@@ -89,7 +89,7 @@ val text_detector = ImageTextDetectorV2
     .setLinkThreshold(0.3) 
     .setWidth(500)
 
-val ocr = ImageToTextV2Opt
+val ocr = ImageToTextV2
     .pretrained("ocr_base_printed_v2_opt", "en", "clinical/ocr") 
     .setInputCols(Array("image", "text_regions")) 
     .setGroupImages(True) 

diff --git a/docs/_posts/aymanechilah/2023-07-11-dit_base_finetuned_rvlcdip_en_3_2.md b/docs/_posts/aymanechilah/2023-07-11-dit_base_finetuned_rvlcdip_en_3_2.md
@@ -0,0 +1,116 @@
+---
+layout: model
+title: DiT model pretrained on IIT-CDIP and finetuned on RVL-CDIP for document classification
+author: John Snow Labs
+name: dit_base_finetuned_rvlcdip
+date: 2023-07-11
+tags: [en, licensed]
+task: OCR Document Classification
+language: en
+nav_key: models
+edition: Visual NLP 4.0.0
+spark_version: 3.2.1
+supported: true
+annotator: VisualDocumentClassifierv3
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+DiT was proposed in DiT: Self-supervised Pre-training for Document Image Transformer by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei. DiT applies the self-supervised objective of BEiT (BERT pre-training of Image Transformers) to 42 million document images.  This model was trained for document image classification in the  RVL-CDIP dataset (a collection of 400,000 images belonging to one of 16 classes).
+
+The abstract from the paper is the following: Image Transformer has recently achieved significant progress for natural image understanding, either using supervised (ViT, DeiT, etc.) or self-supervised (BEiT, MAE, etc.) pre-training techniques. In this paper, we propose DiT, a self-supervised pre-trained Document Image Transformer model using large-scale unlabeled text images for Document AI tasks, which is essential since no supervised counterparts ever exist due to the lack of human labeled document images. We leverage DiT as the backbone network in a variety of vision-based Document AI tasks, including document image classification, document layout analysis, as well as table detection. Experiment results have illustrated that the self-supervised pre-trained DiT model achieves new state-of-the-art results on these downstream tasks, e.g. document image classification (91.11 → 92.69), document layout analysis (91.0 → 94.9) and table detection (94.23 → 96.55).
+
+
+## Predicted Entities
+
+
+
+{:.btn-box}
+[Live Demo](https://demo.johnsnowlabs.com/ocr/IMAGE_CLASSIFIER/){:.button.button-orange.button-orange-trans.co.button-icon}
+[Open in Colab](https://github.com/JohnSnowLabs/spark-ocr-workshop/blob/master/tutorials/Certification_Trainings/5.2.Visual_Document_Classifier_v3.ipynb){:.button.button-orange.button-orange-trans.co.button-icon}
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/ocr/dit_base_finetuned_rvlcdip_en_3.3.0_3.0_1654798502586.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
+
+## How to use
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+
+```python
+binary_to_image = BinaryToImage()\
+    .setInputCol("content") \
+    .setOutputCol("image") \
+    .setImageType(ImageType.TYPE_3BYTE_BGR)
+
+doc_class = VisualDocumentClassifierV3() \
+    .pretrained("dit_base_finetuned_rvlcdip", "en", "clinical/ocr") \
+    .setInputCols(["image"]) \
+    .setOutputCol("label")
+
+# OCR pipeline
+pipeline = PipelineModel(stages=[
+    binary_to_image,
+    doc_class
+])
+
+test_image_path = pkg_resources.resource_filename('sparkocr', 'resources/ocr/visualdoc/00556614_00556648.tif')
+bin_df = spark.read.format("binaryFile").load(test_image_path).limit(50)
+
+results = pipeline.transform(bin_df).cache()
+```
+```scala
+val binary_to_image = new BinaryToImage()
+    .setInputCol("content") 
+    .setOutputCol("image") 
+    .setImageType(ImageType.TYPE_3BYTE_BGR)
+
+val doc_class = VisualDocumentClassifierV3() 
+    .pretrained("dit_base_finetuned_rvlcdip", "en", "clinical/ocr") 
+    .setInputCols(Array("image")) 
+    .setOutputCol("label")
+
+# OCR pipeline
+val pipeline = new PipelineModel().setStages(Array(
+    binary_to_image, 
+    doc_class))
+
+val test_image_path = pkg_resources.resource_filename("sparkocr", "resources/ocr/visualdoc/00556614_00556648.tif")
+val bin_df = spark.read.format("binaryFile").load(test_image_path).limit(50)
+
+val results = pipeline.transform(bin_df).cache()
+```
+</div>
+
+## Example
+
+### Input:
+![Screenshot](/assets/images/examples_ocr/image1.png)
+
+## Output text
+```bash
++-------+
+|label  |
++-------+
+|invoice|
++-------+
+```
+
+
+{:.model-param}
+## Model Information
+
+{:.table-model}
+|---|---|
+|Model Name:|dit_base_finetuned_rvlcdip|
+|Type:|ocr|
+|Compatibility:|Visual NLP 4.0.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Language:|en|
+|Size:|319.6 MB|
+
+## References
+
+IIT-CDIP, RVL-CDIP
diff --git a/docs/_posts/aymanechilah/2023-07-11-tabform_v1_en_3_2.md b/docs/_posts/aymanechilah/2023-07-11-tabform_v1_en_3_2.md
@@ -0,0 +1,115 @@
+---
+layout: model
+title: Table and Form Detection
+author: John Snow Labs
+name: tabform_v1
+date: 2023-07-11
+tags: [en, licensed]
+task: Table and Form Detection
+language: en
+nav_key: models
+edition: Visual NLP 4.3.0
+spark_version: 3.2.1
+supported: true
+article_header:
+  type: cover
+use_language_switcher: "Python-Scala-Java"
+---
+
+## Description
+
+Model for table and form detection in documents. It is based on text detection model with extra post-processing.
+
+## Predicted Entities
+
+{:.btn-box}
+[Live Demo](https://demo.johnsnowlabs.com/ocr/IMAGE_TABLE_FORM_DETECTION/){:.button.button-orange.button-orange-trans.co.button-icon}
+[Open in Colab](https://github.com/JohnSnowLabs/spark-ocr-workshop/blob/master/jupyter/Cards/SparkOcrImageTableAndFormDetection.ipynb){:.button.button-orange.button-orange-trans.co.button-icon}
+[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/clinical/ocr/tabform_v1_en_4.2.5_3.2_1677478327651.zip){:.button.button-orange.button-orange-trans.arr.button-icon.hidden}
+
+
+## How to use
+
+<div class="tabs-box" markdown="1">
+{% include programmingLanguageSelectScalaPythonNLU.html %}
+
+```python
+binary_to_image = BinaryToImage() \
+    .setImageType(ImageType.TYPE_3BYTE_BGR)
+
+region_detector = ImageDocumentRegionDetector.pretrained("tabform_v1", "en", "clinical/ocr") \
+    .setInputCol("image") \
+    .setOutputCol("regions") \
+    .setScoreThreshold(0.25)
+
+draw_regions = ImageDrawRegions() \
+    .setInputCol("image") \
+    .setInputRegionsCol("regions") \
+    .setOutputCol("image_with_regions") \
+    .setRectColor(Color.red)
+
+pipeline = PipelineModel(stages=[
+    binary_to_image,
+    region_detector,
+    draw_regions
+])
+
+imagePath = "data/tabform_images/irs_sp_1.jpg"
+image_df = spark.read.format("binaryFile").load(imagePath)
+
+result = pipeline.transform(image_df)
+```
+```scala
+val binary_to_image = new BinaryToImage()
+    .setImageType(ImageType.TYPE_3BYTE_BGR)
+
+val region_detector = new ImageDocumentRegionDetector.pretrained("tabform_v1", "en", "clinical/ocr")
+    .setInputCol("image")
+    .setOutputCol("regions")
+    .setScoreThreshold(0.25)
+
+val draw_regions = new ImageDrawRegions()
+    .setInputCol("image")
+    .setInputRegionsCol("regions")
+    .setOutputCol("image_with_regions")
+    .setRectColor(Color.red)
+
+val pipeline = new PipelineModel().setStages(Array(
+    binary_to_image,
+    region_detector,
+    draw_regions))
+
+val imagePath = "data/tabform_images/irs_sp_1.jpg"
+val image_df = spark.read.format("binaryFile").load(imagePath)
+
+val result = pipeline.transform(image_df)
+```
+</div>
+
+## Example
+
+{%- capture input_image -%}
+![Screenshot](/assets/images/examples_ocr/tabform_input.png)
+{%- endcapture -%}
+
+{%- capture output_image -%}
+![Screenshot](/assets/images/examples_ocr/tabform_output.png)
+{%- endcapture -%}
+
+
+{% include templates/input_output_image.md
+input_image=input_image
+output_image=output_image
+%}
+
+## Model Information
+
+{:.table-form-model}
+|---|---|
+|Model Name:|tabform_v1|
+|Type:|ocr|
+|Compatibility:|Visual NLP 4.3.0+|
+|License:|Licensed|
+|Edition:|Official|
+|Language:|en|
+