From 24f7b6eae1b75dc77e5ea21ad46fe021ebee1592 Mon Sep 17 00:00:00 2001
From: Ze-Yi LIN <58305964+Zeyi-Lin@users.noreply.github.com>
Date: Tue, 24 Sep 2024 05:17:18 +0800
Subject: [PATCH] feat: api image base64 (#166)

* update base64

* update api docs
---
 deploy_api.py     | 92 +++++++++++++++++++++++++++++++----------------
 docs/api_CN.md    | 36 +++++++++++++++----
 docs/api_EN.md    | 28 +++++++++++----
 hivision/utils.py | 16 +++++++--
 4 files changed, 124 insertions(+), 48 deletions(-)

diff --git a/deploy_api.py b/deploy_api.py
index 6f4230e2..b01e2d73 100644
--- a/deploy_api.py
+++ b/deploy_api.py
@@ -1,4 +1,4 @@
-from fastapi import FastAPI, UploadFile, Form
+from fastapi import FastAPI, UploadFile, Form, File
 from hivision import IDCreator
 from hivision.error import FaceError
 from hivision.creator.layout_calculator import (
@@ -10,7 +10,7 @@
     add_background,
     resize_image_to_kb,
     bytes_2_base64,
-    numpy_2_base64,
+    base64_2_numpy,
     hex_to_rgb,
     add_watermark,
     save_image_dpi_to_bytes,
@@ -38,7 +38,8 @@
 # 证件照智能制作接口
 @app.post("/idphoto")
 async def idphoto_inference(
-    input_image: UploadFile,
+    input_image: UploadFile = File(None),
+    input_image_base64: str = Form(None),
     height: int = Form(413),
     width: int = Form(295),
     human_matting_model: str = Form("modnet_photographic_portrait_matting"),
@@ -50,10 +51,15 @@ async def idphoto_inference(
     head_height_ratio: float = 0.45,
     top_distance_max: float = 0.12,
     top_distance_min: float = 0.10,
-):
-    image_bytes = await input_image.read()
-    nparr = np.frombuffer(image_bytes, np.uint8)
-    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+):  
+    # 如果传入了base64，则直接使用base64解码
+    if input_image_base64:
+        img = base64_2_numpy(input_image_base64)
+    # 否则使用上传的图片
+    else:
+        image_bytes = await input_image.read()
+        nparr = np.frombuffer(image_bytes, np.uint8)
+        img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 
     # ------------------- 选择抠图与人脸检测模型 -------------------
     choose_handler(creator, human_matting_model, face_detect_model)
@@ -91,13 +97,17 @@ async def idphoto_inference(
 # 人像抠图接口
 @app.post("/human_matting")
 async def human_matting_inference(
-    input_image: UploadFile,
+    input_image: UploadFile = File(None),
+    input_image_base64: str = Form(None),
     human_matting_model: str = Form("hivision_modnet"),
     dpi: int = Form(300),
 ):
-    image_bytes = await input_image.read()
-    nparr = np.frombuffer(image_bytes, np.uint8)
-    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    if input_image_base64:
+        img = base64_2_numpy(input_image_base64)
+    else:
+        image_bytes = await input_image.read()
+        nparr = np.frombuffer(image_bytes, np.uint8)
+        img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 
     # ------------------- 选择抠图与人脸检测模型 -------------------
     choose_handler(creator, human_matting_model, None)
@@ -122,7 +132,8 @@ async def human_matting_inference(
 # 透明图像添加纯色背景接口
 @app.post("/add_background")
 async def photo_add_background(
-    input_image: UploadFile,
+    input_image: UploadFile = File(None),
+    input_image_base64: str = Form(None),
     color: str = Form("000000"),
     kb: int = Form(None),
     dpi: int = Form(300),
@@ -130,9 +141,12 @@ async def photo_add_background(
 ):
     render_choice = ["pure_color", "updown_gradient", "center_gradient"]
 
-    image_bytes = await input_image.read()
-    nparr = np.frombuffer(image_bytes, np.uint8)
-    img = cv2.imdecode(nparr, cv2.IMREAD_UNCHANGED)
+    if input_image_base64:
+        img = base64_2_numpy(input_image_base64)
+    else:
+        image_bytes = await input_image.read()
+        nparr = np.frombuffer(image_bytes, np.uint8)
+        img = cv2.imdecode(nparr, cv2.IMREAD_UNCHANGED)
 
     color = hex_to_rgb(color)
     color = (color[2], color[1], color[0])
@@ -160,16 +174,20 @@ async def photo_add_background(
 # 六寸排版照生成接口
 @app.post("/generate_layout_photos")
 async def generate_layout_photos(
-    input_image: UploadFile,
+    input_image: UploadFile = File(None),
+    input_image_base64: str = Form(None),
     height: int = Form(413),
     width: int = Form(295),
     kb: int = Form(None),
     dpi: int = Form(300),
 ):
     # try:
-    image_bytes = await input_image.read()
-    nparr = np.frombuffer(image_bytes, np.uint8)
-    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    if input_image_base64:
+        img = base64_2_numpy(input_image_base64)
+    else:
+        image_bytes = await input_image.read()
+        nparr = np.frombuffer(image_bytes, np.uint8)
+        img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 
     size = (int(height), int(width))
 
@@ -202,7 +220,8 @@ async def generate_layout_photos(
 # 透明图像添加水印接口
 @app.post("/watermark")
 async def watermark(
-    input_image: UploadFile,
+    input_image: UploadFile = File(None),
+    input_image_base64: str = Form(None),
     text: str = Form("Hello"),
     size: int = 20,
     opacity: float = 0.5,
@@ -212,9 +231,12 @@ async def watermark(
     kb: int = Form(None),
     dpi: int = Form(300),
 ):
-    image_bytes = await input_image.read()
-    nparr = np.frombuffer(image_bytes, np.uint8)
-    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    if input_image_base64:
+        img = base64_2_numpy(input_image_base64)
+    else:
+        image_bytes = await input_image.read()
+        nparr = np.frombuffer(image_bytes, np.uint8)
+        img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 
     try:
         result_image = add_watermark(img, text, size, opacity, angle, color, space)
@@ -242,13 +264,17 @@ async def watermark(
 # 设置照片KB值接口(RGB图)
 @app.post("/set_kb")
 async def set_kb(
-    input_image: UploadFile,
+    input_image: UploadFile = File(None),
+    input_image_base64: str = Form(None),
     dpi: int = Form(300),
     kb: int = Form(50),
 ):
-    image_bytes = await input_image.read()
-    nparr = np.frombuffer(image_bytes, np.uint8)
-    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    if input_image_base64:
+        img = base64_2_numpy(input_image_base64)
+    else:
+        image_bytes = await input_image.read()
+        nparr = np.frombuffer(image_bytes, np.uint8)
+        img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 
     try:
         result_image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
@@ -271,7 +297,8 @@ async def set_kb(
 # 证件照智能裁剪接口
 @app.post("/idphoto_crop")
 async def idphoto_crop_inference(
-    input_image: UploadFile,
+    input_image: UploadFile = File(None),
+    input_image_base64: str = Form(None),
     height: int = Form(413),
     width: int = Form(295),
     face_detect_model: str = Form("mtcnn"),
@@ -282,9 +309,12 @@ async def idphoto_crop_inference(
     top_distance_max: float = 0.12,
     top_distance_min: float = 0.10,
 ):
-    image_bytes = await input_image.read()
-    nparr = np.frombuffer(image_bytes, np.uint8)
-    img = cv2.imdecode(nparr, cv2.IMREAD_UNCHANGED)  # 读取图像(4通道)
+    if input_image_base64:
+        img = base64_2_numpy(input_image_base64)
+    else:
+        image_bytes = await input_image.read()
+        nparr = np.frombuffer(image_bytes, np.uint8)
+        img = cv2.imdecode(nparr, cv2.IMREAD_UNCHANGED)  # 读取图像(4通道)
 
     # ------------------- 选择抠图与人脸检测模型 -------------------
     choose_handler(creator, face_detect_option=face_detect_model)
diff --git a/docs/api_CN.md b/docs/api_CN.md
index 24dc6e6a..8af37cdc 100644
--- a/docs/api_CN.md
+++ b/docs/api_CN.md
@@ -7,6 +7,13 @@
 
 - [开始之前：开启后端服务](#开始之前开启后端服务)
 - [接口功能说明](#接口功能说明)
+  - [1.生成证件照(底透明)](#1生成证件照底透明)
+  - [2.添加背景色](#2添加背景色)
+  - [3.生成六寸排版照](#3生成六寸排版照)
+  - [4.人像抠图](#4人像抠图)
+  - [5.图像加水印](#5图像加水印)
+  - [6.设置图像KB大小](#6设置图像KB大小)
+  - [7.证件照裁切](#7证件照裁切)
 - [cURL 请求示例](#curl-请求示例)
 - [Python 请求示例](#python-请求示例)
 
@@ -40,7 +47,8 @@ python deploy_api.py
 
 | 参数名 | 类型 | 必填 | 说明 |
 | :--- | :--- | :--- | :--- |
-| input_image | file | 是 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
+| input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
+| input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGB三通道图像。 |
 | height | int | 否 | 标准证件照高度，默认为`413` |
 | width | int | 否 | 标准证件照宽度，默认为`295` |
 | human_matting_model | str | 否 | 人像分割模型，默认为`modnet_photographic_portrait_matting`。可选值为`modnet_photographic_portrait_matting`、`hivision_modnet`、`rmbg-1.4`、`birefnet-v1-lite` |
@@ -62,6 +70,8 @@ python deploy_api.py
 | image_base64_standard | str | 标准证件照的base64编码 |
 | image_base64_hd | str | 高清证件照的base64编码。如`hd`参数为`false`，则不返回该参数 |
 
+<br>
+
 ### 2.添加背景色
 
 接口名：`add_background`
@@ -72,7 +82,8 @@ python deploy_api.py
 
 | 参数名 | 类型 | 必填 | 说明 |
 | :--- | :--- | :--- | :--- |
-| input_image | file | 是 | 传入的图像文件，图像文件为需为RGBA四通道图像。 |
+| input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGBA四通道图像。 |
+| input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGBA四通道图像。 |
 | color | str | 否 | 背景色HEX值，默认为`000000` |
 | kb | int | 否 | 输出照片的 KB 值，默认为`None`，即不对图像进行KB调整。|
 | render | int | 否 | 渲染模式，默认为`0`。可选值为`0`、`1`、`2`，分别对应`纯色`、`上下渐变`、`中心渐变`。 |
@@ -85,6 +96,8 @@ python deploy_api.py
 | status | int | 状态码，`true`表示成功 |
 | image_base64 | str | 添加背景色之后的图像的base64编码 |
 
+<br>
+
 ### 3.生成六寸排版照
 
 接口名：`generate_layout_photos`
@@ -95,7 +108,8 @@ python deploy_api.py
 
 | 参数名 | 类型 | 必填 | 说明 |
 | :--- | :--- | :--- | :--- |
-| input_image | file | 是 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
+| input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
+| input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGB三通道图像。 |
 | height | int | 否 | 输入图像的高度，默认为`413` |
 | width | int | 否 | 输入图像的宽度，默认为`295` |
 | kb | int | 否 | 输出照片的 KB 值，默认为`None`，即不对图像进行KB调整。|
@@ -108,6 +122,8 @@ python deploy_api.py
 | status | int | 状态码，`true`表示成功 |
 | image_base64 | str | 六寸排版照的base64编码 |
 
+<br>
+
 ### 4.人像抠图
 
 接口名：`human_matting`
@@ -129,6 +145,7 @@ python deploy_api.py
 | status | int | 状态码，`true`表示成功 |
 | image_base64 | str | 抠图人像照的base64编码 |
 
+<br>
 
 ### 5.图像加水印
 
@@ -140,7 +157,8 @@ python deploy_api.py
 
 | 参数名 | 类型 | 必填 | 说明 |
 | :--- | :--- | :--- | :--- |
-| input_image | file | 是 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
+| input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
+| input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGB三通道图像。 |
 | text | str | 否 | 水印文本，默认为`Hello` |
 | size | int | 否 | 水印字体大小，默认为`20` |
 | opacity | float | 否 | 水印透明度，默认为`0.5` |
@@ -156,6 +174,8 @@ python deploy_api.py
 | status | int | 状态码，`true`表示成功 |
 | image_base64 | str | 添加水印之后的图像的base64编码 |
 
+<br>
+
 ### 6.设置图像KB大小
 
 接口名：`set_kb`
@@ -166,7 +186,8 @@ python deploy_api.py
 
 | 参数名 | 类型 | 必填 | 说明 |
 | :--- | :--- | :--- | :--- |
-| input_image | file | 是 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
+| input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGB三通道图像。 |
+| input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGB三通道图像。 |
 | kb | int | 否 | 输出照片的 KB 值，默认为`None`，即不对图像进行KB调整。|
 | dpi | int | 否 | 图像分辨率，默认为`300` |
 
@@ -177,7 +198,7 @@ python deploy_api.py
 | status | int | 状态码，`true`表示成功 |
 | image_base64 | str | 设置KB大小之后的图像的base64编码 |
 
-
+<br>
 
 ### 7.证件照裁切
 
@@ -189,7 +210,8 @@ python deploy_api.py
 
 | 参数名 | 类型 | 必填 | 说明 |
 | :--- | :--- | :--- | :--- |
-| input_image | file | 是 | 传入的图像文件，图像文件为需为RGBA四通道图像。 |
+| input_image | file | 和`input_image_base64`二选一 | 传入的图像文件，图像文件为需为RGBA四通道图像。 |
+| input_image_base64 | str | 和`input_image`二选一 | 传入的图像文件的base64编码，图像文件为需为RGBA四通道图像。 |
 | height | int | 否 | 标准证件照高度，默认为`413` |
 | width | int | 否 | 标准证件照宽度，默认为`295` |
 | face_detect_model | str | 否 | 人脸检测模型，默认为`mtcnn`。可选值为`mtcnn`、`face_plusplus`、`retinaface-resnet50` |
diff --git a/docs/api_EN.md b/docs/api_EN.md
index 13e42c83..b2c1abae 100644
--- a/docs/api_EN.md
+++ b/docs/api_EN.md
@@ -6,6 +6,13 @@ English / [中文](README.md)
 
 - [Before You Start: Start the Backend Service](#before-you-start-start-the-backend-service)
 - [API Functionality Description](#api-functionality-description)
+  - [1. Generate ID Photo (Transparent Background)](#1-generate-id-photo-transparent-background)
+  - [2. Add Background Color](#2-add-background-color)
+  - [3. Generate Six-Inch Layout Photo](#3-generate-six-inch-layout-photo)
+  - [4. Human Matting](#4-human-matting)
+  - [5. Add Watermark to Image](#5-add-watermark-to-image)
+  - [6. Set Image KB Size](#6-set-image-kb-size)
+  - [7. ID Photo Cropping](#7-id-photo-cropping)
 - [cURL Request Examples](#curl-request-examples)
 - [Python Request Examples](#python-request-examples)
 
@@ -39,7 +46,8 @@ It is important to note that both generated photos are transparent (RGBA four-ch
 
 | Parameter Name | Type | Required | Description |
 | :--- | :--- | :--- | :--- |
-| input_image | file | Yes | input image file, with the image file needing to be an RGB three-channel image. |
+| input_image | file | Choose one of `input_image` or `input_image_base64` | The input image file, which needs to be an RGB three-channel image. |
+| input_image_base64 | str | Choose one of `input_image` or `input_image_base64` | The base64 encoding of the input image file, which needs to be an RGB three-channel image. |
 | height | int | No | The height of the standard ID photo, with a default value of `413`. |
 | width | int | No | The width of the standard ID photo, with a default value of `295`. |
 | human_matting_model | str | No | The human segmentation model, with a default value of `modnet_photographic_portrait_matting`. Available values are `modnet_photographic_portrait_matting`, `hivision_modnet`, `rmbg-1.4`, and `birefnet-v1-lite`. |
@@ -70,7 +78,8 @@ The logic of the `Add Background Color` API is to receive an RGBA image (transpa
 
 | Parameter Name | Type | Required | Description |
 | :--- | :--- | :--- | :--- |
-| input_image | file | Yes | the input image file, with the image file needing to be an RGBA four-channel image. |
+| input_image | file | Choose one of `input_image` or `input_image_base64` | The input image file, which needs to be an RGB three-channel image. |
+| input_image_base64 | str | Choose one of `input_image` or `input_image_base64` | The base64 encoding of the input image file, which needs to be an RGB three-channel image. |
 | color | str | Yes | The background color in hexadecimal format, e.g., `#000000` for black. |
 | kb | int | No | The target file size in KB. If the specified KB value is less than the original file, it adjusts the compression rate. If the specified KB value is greater than the source file, it increases the KB value by adding information to the file header, aiming for the final size of the image to match the specified KB value. |
 | render | int | No | The rendering mode, with a default value of `0`. Available values are `0`, `1`, and `2`. |
@@ -95,7 +104,8 @@ The logic of the `Generate Six-Inch Layout Photo` API is to receive an RGB image
 
 | Parameter Name | Type | Required | Description |
 | :--- | :--- | :--- | :--- |
-| input_image | file | Yes | the input image file, with the image file needing to be an RGB three-channel image. |
+| input_image | file | Choose one of `input_image` or `input_image_base64` | The input image file, which needs to be an RGB three-channel image. |
+| input_image_base64 | str | Choose one of `input_image` or `input_image_base64` | The base64 encoding of the input image file, which needs to be an RGB three-channel image. |
 | height | int | No | The height of the standard ID photo, with a default value of `413`. |
 | width | int | No | The width of the standard ID photo, with a default value of `295`. |
 | kb | int | No | The target file size in KB. If the specified KB value is less than the original file, it adjusts the compression rate. If the specified KB value is greater than the source file, it increases the KB value by adding information to the file header, aiming for the final size of the image to match the specified KB value. |
@@ -121,7 +131,8 @@ The logic of the `Human Matting` API is to receive an RGB image and output a sta
 
 | Parameter Name | Type | Required | Description |
 | :--- | :--- | :--- | :--- |
-| input_image | file | Yes | input image file, with the image file needing to be an RGB three-channel image. |
+| input_image | file | Choose one of `input_image` or `input_image_base64` | The input image file, which needs to be an RGB three-channel image. |
+| input_image_base64 | str | Choose one of `input_image` or `input_image_base64` | The base64 encoding of the input image file, which needs to be an RGB three-channel image. |
 | human_matting_model | str | No | The human segmentation model, with a default value of `modnet_photographic_portrait_matting`. Available values are `modnet_photographic_portrait_matting`, `hivision_modnet`, `rmbg-1.4`, and `birefnet-v1-lite`. |
 | dpi | int | No | The image resolution, with a default value of `300`. |
 
@@ -145,7 +156,8 @@ The functionality of the `Add Watermark to Image` API is to receive a watermark
 
 | Parameter Name | Type | Required | Description |
 | :--- | :--- | :--- | :--- |
-| input_image | file | Yes | input image file, with the image file needing to be an RGB three-channel image. |
+| input_image | file | Choose one of `input_image` or `input_image_base64` | The input image file, which needs to be an RGB three-channel image. |
+| input_image_base64 | str | Choose one of `input_image` or `input_image_base64` | The base64 encoding of the input image file, which needs to be an RGB three-channel image. |
 | text | str | Yes | The watermark text to be added. |
 | size | int | No | The size of the watermark text, with a default value of `20`. |
 | opacity | float | No | The opacity of the watermark text, with a default value of `0.5`. |
@@ -172,7 +184,8 @@ The functionality of the `Set Image KB Size` API is to receive an image and a ta
 
 | Parameter Name | Type | Required | Description |
 | :--- | :--- | :--- | :--- |
-| input_image | file | Yes | input image file, with the image file needing to be an RGB three-channel image. |
+| input_image | file | Choose one of `input_image` or `input_image_base64` | The input image file, which needs to be an RGB three-channel image. |
+| input_image_base64 | str | Choose one of `input_image` or `input_image_base64` | The base64 encoding of the input image file, which needs to be an RGB three-channel image. |
 | kb | int | Yes | The target file size in KB. |
 | dpi | int | No | The image resolution, with a default value of `300`. |
 
@@ -196,7 +209,8 @@ The functionality of the `ID Photo Cropping` API is to receive an RGBA image (tr
 
 | Parameter Name | Type | Required | Description |
 | :--- | :--- | :--- | :--- |
-| input_image | file | Yes | input image file, with the image file needing to be an RGB three-channel image. |
+| input_image | file | Choose one of `input_image` or `input_image_base64` | The input image file, which needs to be an RGB three-channel image. |
+| input_image_base64 | str | Choose one of `input_image` or `input_image_base64` | The base64 encoding of the input image file, which needs to be an RGB three-channel image. |
 | height | int | Yes | The height of the standard ID photo. |
 | width | int | Yes | The width of the standard ID photo. |
 | face_detect_model | str | No | The face detection model, with a default value of `mtcnn`. Available values are `mtcnn`, `face_plusplus`, and `retinaface-resnet50`. |
diff --git a/hivision/utils.py b/hivision/utils.py
index f73a17f7..97c030e1 100644
--- a/hivision/utils.py
+++ b/hivision/utils.py
@@ -175,9 +175,19 @@ def numpy_2_base64(img: np.ndarray) -> str:
 
 
 def base64_2_numpy(base64_image: str) -> np.ndarray:
-    img = base64.b64decode(base64_image)
-    img = np.frombuffer(img, np.uint8)
-
+    # Remove the data URL prefix if present
+    if base64_image.startswith('data:image'):
+        base64_image = base64_image.split(',')[1]
+    
+    # Decode base64 string to bytes
+    img_bytes = base64.b64decode(base64_image)
+    
+    # Convert bytes to numpy array
+    img_array = np.frombuffer(img_bytes, dtype=np.uint8)
+    
+    # Decode the image array
+    img = cv2.imdecode(img_array, cv2.IMREAD_UNCHANGED)
+    
     return img
 
 # 字节流转base64