Merge pull request #65 from BitMind-AI/llm-annotation-moderation

Image Annotation Moderation and FLUX Model Integration
BitMind-AI · Oct 16, 2024 · 27576bc · 27576bc
2 parents 57f4599 + d2cf985
commit 27576bc
Show file tree

Hide file tree

Showing 28 changed files with 704 additions and 4,828 deletions.
diff --git a/autoupdate_validator_steps.sh b/autoupdate_validator_steps.sh
@@ -7,4 +7,5 @@
 echo $CONDA_PREFIX
 $CONDA_PREFIX/bin/pip install -e .
 $CONDA_PREFIX/bin/python bitmind/download_data.py
+$CONDA_PREFIX/bin/python bitmind/validator/verify_models.py
 echo "Autoupdate steps complete :)"
diff --git a/bitmind/constants.py b/bitmind/constants.py
@@ -1,12 +1,17 @@
 import os
+import torch
 
 
 WANDB_PROJECT = 'bitmind-subnet'
 WANDB_ENTITY = 'bitmindai'
 
 DATASET_META = {
     "real": [
-        {"path": "bitmind/bm-real"}
+        {"path": "bitmind/bm-real"},
+        {"path": "bitmind/open-images-v7"},
+        {"path": "bitmind/celeb-a-hq"},
+        {"path": "bitmind/ffhq-256"},
+        {"path": "bitmind/MS-COCO-unique-256"}
     ],
     "fake": [
         {"path": "bitmind/bm-realvisxl"},
@@ -48,19 +53,36 @@
         {
             "path": "stabilityai/stable-diffusion-xl-base-1.0",
             "use_safetensors": True,
+            "torch_dtype": torch.float16,
             "variant": "fp16",
             "pipeline": "StableDiffusionXLPipeline"
         },
         {
             "path": "SG161222/RealVisXL_V4.0",
             "use_safetensors": True,
+            "torch_dtype": torch.float16,
             "variant": "fp16",
             "pipeline": "StableDiffusionXLPipeline"
         },
         {
             "path": "Corcelio/mobius",
             "use_safetensors": True,
+            "torch_dtype": torch.float16,
             "pipeline": "StableDiffusionXLPipeline"
+        },
+        {
+            "path": 'black-forest-labs/FLUX.1-dev',
+            "use_safetensors": True,
+            "torch_dtype": torch.bfloat16,
+            "generate_args": {
+                "guidance_scale": 2,
+                "num_inference_steps": {"min": 50, "max": 125},
+                "generator": torch.Generator("cuda" if torch.cuda.is_available() else "cpu"),
+                "height": [512, 768],
+                "width": [512, 768]
+            },
+            "enable_cpu_offload": False,
+            "pipeline": "FluxPipeline"
         }
     ]
 }
@@ -69,16 +91,30 @@
 
 TARGET_IMAGE_SIZE = (256, 256)
 
-PROMPT_TYPES = ('random', 'annotation')
+PROMPT_TYPES = ('random', 'annotation', 'none')
 
 PROMPT_GENERATOR_ARGS = {
     m['model']: m for m in VALIDATOR_MODEL_META['prompt_generators']
 }
 
 PROMPT_GENERATOR_NAMES = list(PROMPT_GENERATOR_ARGS.keys())
 
+# args for .from_pretrained
 DIFFUSER_ARGS = {
-    m['path']: {k: v for k, v in m.items() if k != 'path' and k != 'pipeline'}  
+    m['path']: {
+        k: v for k, v in m.items()
+        if k not in ('path', 'pipeline', 'generate_args', 'enable_cpu_offload')
+    } for m in VALIDATOR_MODEL_META['diffusers']
+}
+
+GENERATE_ARGS = {
+    m['path']: m['generate_args']
+    for m in VALIDATOR_MODEL_META['diffusers']
+    if 'generate_args' in m
+}
+
+DIFFUSER_CPU_OFFLOAD_ENABLED = {
+    m['path']: m.get('enable_cpu_offload', False)
     for m in VALIDATOR_MODEL_META['diffusers']
 }
 
@@ -88,4 +124,6 @@
 
 DIFFUSER_NAMES = list(DIFFUSER_ARGS.keys())
 
-IMAGE_ANNOTATION_MODEL = "Salesforce/blip2-opt-2.7b-coco"
+IMAGE_ANNOTATION_MODEL = "Salesforce/blip2-opt-6.7b-coco"
+
+TEXT_MODERATION_MODEL = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit" 
diff --git a/bitmind/miner/__init__.py b/bitmind/miner/__init__.py
diff --git a/bitmind/protocol.py b/bitmind/protocol.py
@@ -26,12 +26,6 @@
 import base64
 import torch
 
-def b64_encode(image):
-    if isinstance(image, torch.Tensor):
-        image = transforms.ToPILImage()(image.cpu().detach())
-    image_bytes = BytesIO()
-    image.save(image_bytes, format="JPEG")
-    return base64.b64encode(image_bytes.getvalue())
 
 def prepare_image_synapse(image: Image):
     """
@@ -43,7 +37,12 @@ def prepare_image_synapse(image: Image):
     Returns:
         ImageSynapse: An instance of ImageSynapse containing the encoded image and a default prediction value.
     """
-    b64_encoded_image = b64_encode(image)
+    if isinstance(image, torch.Tensor):
+        image = transforms.ToPILImage()(image.cpu().detach())
+
+    image_bytes = BytesIO()
+    image.save(image_bytes, format="JPEG")
+    b64_encoded_image = base64.b64encode(image_bytes.getvalue())
     return ImageSynapse(image=b64_encoded_image)
 
 

diff --git a/bitmind/synthetic_image_generation/README.md b/bitmind/synthetic_image_generation/README.md
@@ -1,18 +1,4 @@
 
 # Synthetic Image Generation
 
-This folder contains files for the implementation of a joint vision-to-language and text-to-image model system that generates highly diverse and realistic images for deepfake detector training.
-
-**test_data/:**
-
-Default output directory for real-image-to-annotation and annotation-to-synthetic-image pipelines in the associated notebooks.
-
-Notebooks:
-
-**real_image_to_text_annotation.ipynb :**
-
-Pipeline for real image dataset to text caption dataset generation. Contains function that generates subdirectories of annotations for each real image dataset. Annotations are formatted as JSONs with captions (Strings) of images. The filename of the JSONs correspond to the image index in the associated dataset dictionary.
-
-**text_annotation_to_synthetic_image.ipynb :**
-
-Pipeline for text annotation to synthetic image dataset generation.
+This folder contains files for the implementation of a joint vision-to-language and text-to-image model system that generates highly diverse and realistic images for deepfake detector training and Subnet 34 validating.
diff --git a/bitmind/synthetic_image_generation/combine_datasets.py b/bitmind/synthetic_image_generation/combine_datasets.py