diff --git a/extras/BLIP/models/blip.py b/extras/BLIP/models/blip.py index a2566331d..0f8c1d270 100644 --- a/extras/BLIP/models/blip.py +++ b/extras/BLIP/models/blip.py @@ -216,9 +216,9 @@ def is_url(url_or_filename): def load_checkpoint(model,url_or_filename): if is_url(url_or_filename): cached_file = download_cached_file(url_or_filename, check_hash=False, progress=True) - checkpoint = torch.load(cached_file, map_location='cpu') + checkpoint = torch.load(cached_file, map_location='cpu', weights_only=True) elif os.path.isfile(url_or_filename): - checkpoint = torch.load(url_or_filename, map_location='cpu') + checkpoint = torch.load(url_or_filename, map_location='cpu', weights_only=True) else: raise RuntimeError('checkpoint url or path is invalid') diff --git a/extras/BLIP/models/blip_nlvr.py b/extras/BLIP/models/blip_nlvr.py index 0eb9eaa69..159d03146 100644 --- a/extras/BLIP/models/blip_nlvr.py +++ b/extras/BLIP/models/blip_nlvr.py @@ -78,9 +78,9 @@ def blip_nlvr(pretrained='',**kwargs): def load_checkpoint(model,url_or_filename): if is_url(url_or_filename): cached_file = download_cached_file(url_or_filename, check_hash=False, progress=True) - checkpoint = torch.load(cached_file, map_location='cpu') + checkpoint = torch.load(cached_file, map_location='cpu', weights_only=True) elif os.path.isfile(url_or_filename): - checkpoint = torch.load(url_or_filename, map_location='cpu') + checkpoint = torch.load(url_or_filename, map_location='cpu', weights_only=True) else: raise RuntimeError('checkpoint url or path is invalid') state_dict = checkpoint['model'] diff --git a/extras/facexlib/detection/__init__.py b/extras/facexlib/detection/__init__.py index 4e52fd74e..439138400 100644 --- a/extras/facexlib/detection/__init__.py +++ b/extras/facexlib/detection/__init__.py @@ -19,7 +19,7 @@ def init_detection_model(model_name, half=False, device='cuda', model_rootpath=N url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath) # TODO: clean pretrained model - load_net = torch.load(model_path, map_location=lambda storage, loc: storage) + load_net = torch.load(model_path, map_location=lambda storage, loc: storage, weights_only=True) # remove unnecessary 'module.' for k, v in deepcopy(load_net).items(): if k.startswith('module.'): diff --git a/extras/facexlib/parsing/__init__.py b/extras/facexlib/parsing/__init__.py index 8b4758bdd..ed5009c09 100644 --- a/extras/facexlib/parsing/__init__.py +++ b/extras/facexlib/parsing/__init__.py @@ -17,7 +17,7 @@ def init_parsing_model(model_name='bisenet', half=False, device='cuda', model_ro model_path = load_file_from_url( url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath) - load_net = torch.load(model_path, map_location=lambda storage, loc: storage) + load_net = torch.load(model_path, map_location=lambda storage, loc: storage, weights_only=True) model.load_state_dict(load_net, strict=True) model.eval() model = model.to(device) diff --git a/extras/ip_adapter.py b/extras/ip_adapter.py index 22527d244..d29f1de25 100644 --- a/extras/ip_adapter.py +++ b/extras/ip_adapter.py @@ -104,7 +104,7 @@ def load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path): offload_device = torch.device('cpu') use_fp16 = model_management.should_use_fp16(device=load_device) - ip_state_dict = torch.load(ip_adapter_path, map_location="cpu") + ip_state_dict = torch.load(ip_adapter_path, map_location="cpu", weights_only=True) plus = "latents" in ip_state_dict["image_proj"] cross_attention_dim = ip_state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[1] sdxl = cross_attention_dim == 2048 diff --git a/fooocus_version.py b/fooocus_version.py index ca4825b5b..326513ef1 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.5.2' +version = '2.5.3' diff --git a/language/en.json b/language/en.json index a0935643a..7e8b27daa 100644 --- a/language/en.json +++ b/language/en.json @@ -17,6 +17,7 @@ "Content Type": "Content Type", "Photograph": "Photograph", "Art/Anime": "Art/Anime", + "Apply Styles": "Apply Styles", "Describe this Image into Prompt": "Describe this Image into Prompt", "Image Size and Recommended Size": "Image Size and Recommended Size", "Upscale or Variation:": "Upscale or Variation:", diff --git a/ldm_patched/ldm/modules/encoders/noise_aug_modules.py b/ldm_patched/ldm/modules/encoders/noise_aug_modules.py index a5d866030..d8588d4be 100644 --- a/ldm_patched/ldm/modules/encoders/noise_aug_modules.py +++ b/ldm_patched/ldm/modules/encoders/noise_aug_modules.py @@ -8,7 +8,7 @@ def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs): if clip_stats_path is None: clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim) else: - clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu") + clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu", weights_only=True) self.register_buffer("data_mean", clip_mean[None, :], persistent=False) self.register_buffer("data_std", clip_std[None, :], persistent=False) self.time_embed = Timestep(timestep_dim) diff --git a/ldm_patched/modules/sd1_clip.py b/ldm_patched/modules/sd1_clip.py index 3727fb482..38579cf4c 100644 --- a/ldm_patched/modules/sd1_clip.py +++ b/ldm_patched/modules/sd1_clip.py @@ -326,7 +326,7 @@ def load_embed(embedding_name, embedding_directory, embedding_size, embed_key=No except: embed_out = safe_load_embed_zip(embed_path) else: - embed = torch.load(embed_path, map_location="cpu") + embed = torch.load(embed_path, map_location="cpu", weights_only=True) except Exception as e: print(traceback.format_exc()) print() diff --git a/ldm_patched/pfn/architecture/face/codeformer.py b/ldm_patched/pfn/architecture/face/codeformer.py index a0e2e985e..1ed4602bd 100644 --- a/ldm_patched/pfn/architecture/face/codeformer.py +++ b/ldm_patched/pfn/architecture/face/codeformer.py @@ -377,15 +377,15 @@ def __init__( ) if model_path is not None: - chkpt = torch.load(model_path, map_location="cpu") + chkpt = torch.load(model_path, map_location="cpu", weights_only=True) if "params_ema" in chkpt: self.load_state_dict( - torch.load(model_path, map_location="cpu")["params_ema"] + torch.load(model_path, map_location="cpu", weights_only=True)["params_ema"] ) logger.info(f"vqgan is loaded from: {model_path} [params_ema]") elif "params" in chkpt: self.load_state_dict( - torch.load(model_path, map_location="cpu")["params"] + torch.load(model_path, map_location="cpu", weights_only=True)["params"] ) logger.info(f"vqgan is loaded from: {model_path} [params]") else: diff --git a/ldm_patched/pfn/architecture/face/gfpgan_bilinear_arch.py b/ldm_patched/pfn/architecture/face/gfpgan_bilinear_arch.py index b6e820e00..4df2bc3d6 100644 --- a/ldm_patched/pfn/architecture/face/gfpgan_bilinear_arch.py +++ b/ldm_patched/pfn/architecture/face/gfpgan_bilinear_arch.py @@ -273,8 +273,8 @@ def __init__( if decoder_load_path: self.stylegan_decoder.load_state_dict( torch.load( - decoder_load_path, map_location=lambda storage, loc: storage - )["params_ema"] + decoder_load_path, map_location=lambda storage, loc: storage, + weights_only=True)["params_ema"] ) # fix decoder without updating params if fix_decoder: diff --git a/ldm_patched/pfn/architecture/face/gfpganv1_arch.py b/ldm_patched/pfn/architecture/face/gfpganv1_arch.py index 72d72fc86..dcdeaf9f0 100644 --- a/ldm_patched/pfn/architecture/face/gfpganv1_arch.py +++ b/ldm_patched/pfn/architecture/face/gfpganv1_arch.py @@ -373,8 +373,8 @@ def __init__( if decoder_load_path: self.stylegan_decoder.load_state_dict( torch.load( - decoder_load_path, map_location=lambda storage, loc: storage - )["params_ema"] + decoder_load_path, map_location=lambda storage, loc: storage, + weights_only=True)["params_ema"] ) # fix decoder without updating params if fix_decoder: diff --git a/ldm_patched/pfn/architecture/face/gfpganv1_clean_arch.py b/ldm_patched/pfn/architecture/face/gfpganv1_clean_arch.py index 16470d634..475b982f0 100644 --- a/ldm_patched/pfn/architecture/face/gfpganv1_clean_arch.py +++ b/ldm_patched/pfn/architecture/face/gfpganv1_clean_arch.py @@ -284,8 +284,8 @@ def __init__( if decoder_load_path: self.stylegan_decoder.load_state_dict( torch.load( - decoder_load_path, map_location=lambda storage, loc: storage - )["params_ema"] + decoder_load_path, map_location=lambda storage, loc: storage, + weights_only=True)["params_ema"] ) # fix decoder without updating params if fix_decoder: diff --git a/modules/config.py b/modules/config.py index 0f2038b61..001cf949d 100644 --- a/modules/config.py +++ b/modules/config.py @@ -702,6 +702,19 @@ def init_temp_path(path: str | None, default_path: str) -> str: expected_type=str ) +default_describe_apply_prompts_checkbox = get_config_item_or_set_default( + key='default_describe_apply_prompts_checkbox', + default_value=True, + validator=lambda x: isinstance(x, bool), + expected_type=bool +) +default_describe_content_type = get_config_item_or_set_default( + key='default_describe_content_type', + default_value=[modules.flags.describe_type_photo], + validator=lambda x: all(k in modules.flags.describe_types for k in x), + expected_type=list +) + config_dict["default_loras"] = default_loras = default_loras[:default_max_lora_number] + [[True, 'None', 1.0] for _ in range(default_max_lora_number - len(default_loras))] # mapping config to meta parameter diff --git a/modules/core.py b/modules/core.py index 78c897592..1c3dacb99 100644 --- a/modules/core.py +++ b/modules/core.py @@ -231,7 +231,7 @@ def get_previewer(model): if vae_approx_filename in VAE_approx_models: VAE_approx_model = VAE_approx_models[vae_approx_filename] else: - sd = torch.load(vae_approx_filename, map_location='cpu') + sd = torch.load(vae_approx_filename, map_location='cpu', weights_only=True) VAE_approx_model = VAEApprox() VAE_approx_model.load_state_dict(sd) del sd diff --git a/modules/flags.py b/modules/flags.py index 4357cdf11..05c29a232 100644 --- a/modules/flags.py +++ b/modules/flags.py @@ -96,6 +96,7 @@ describe_type_photo = 'Photograph' describe_type_anime = 'Art/Anime' +describe_types = [describe_type_photo, describe_type_anime] sdxl_aspect_ratios = [ '704*1408', '704*1344', '768*1344', '768*1280', '832*1216', '832*1152', diff --git a/modules/inpaint_worker.py b/modules/inpaint_worker.py index 43a7ae23e..88a78a6d6 100644 --- a/modules/inpaint_worker.py +++ b/modules/inpaint_worker.py @@ -196,7 +196,7 @@ def patch(self, inpaint_head_model_path, inpaint_latent, inpaint_latent_mask, mo if inpaint_head_model is None: inpaint_head_model = InpaintHead() - sd = torch.load(inpaint_head_model_path, map_location='cpu') + sd = torch.load(inpaint_head_model_path, map_location='cpu', weights_only=True) inpaint_head_model.load_state_dict(sd) feed = torch.cat([ diff --git a/modules/upscaler.py b/modules/upscaler.py index de9a143c5..222da1ee3 100644 --- a/modules/upscaler.py +++ b/modules/upscaler.py @@ -17,7 +17,7 @@ def perform_upscale(img): if model is None: model_filename = downloading_upscale_model() - sd = torch.load(model_filename) + sd = torch.load(model_filename, weights_only=True) sdo = OrderedDict() for k, v in sd.items(): sdo[k.replace('residual_block_', 'RDB')] = v diff --git a/update_log.md b/update_log.md index e3a5e273d..8dbd653e6 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,8 @@ +# [2.5.3](https://github.com/lllyasviel/Fooocus/releases/tag/v2.5.3) + +* Only load weights from non-safetensors files, preventing harmful code injection +* Add checkbox for applying/resetting styles when describing images, also allowing multiple describe content types + # [2.5.2](https://github.com/lllyasviel/Fooocus/releases/tag/v2.5.2) * Fix not adding positive prompt when styles didn't have a {prompt} placeholder in the positive prompt diff --git a/webui.py b/webui.py index 05d9e222e..31f2d4cda 100644 --- a/webui.py +++ b/webui.py @@ -341,10 +341,11 @@ def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model with gr.Column(): describe_input_image = grh.Image(label='Image', source='upload', type='numpy', show_label=False) with gr.Column(): - describe_method = gr.Radio( + describe_methods = gr.CheckboxGroup( label='Content Type', - choices=[flags.describe_type_photo, flags.describe_type_anime], - value=flags.describe_type_photo) + choices=flags.describe_types, + value=modules.config.default_describe_content_type) + describe_apply_styles = gr.Checkbox(label='Apply Styles', value=modules.config.default_describe_apply_prompts_checkbox) describe_btn = gr.Button(value='Describe this Image into Prompt') describe_image_size = gr.Textbox(label='Image Size and Recommended Size', elem_id='describe_image_size', visible=False) gr.HTML('\U0001F4D4 Documentation') @@ -1064,30 +1065,54 @@ def trigger_metadata_import(file, state_is_generating): gr.Audio(interactive=False, value=notification_file, elem_id='audio_notification', visible=False) break - def trigger_describe(mode, img): - if mode == flags.describe_type_photo: + def trigger_describe(modes, img, apply_styles): + describe_prompts = [] + styles = set() + + if flags.describe_type_photo in modes: from extras.interrogate import default_interrogator as default_interrogator_photo - return default_interrogator_photo(img), ["Fooocus V2", "Fooocus Enhance", "Fooocus Sharp"] - if mode == flags.describe_type_anime: + describe_prompts.append(default_interrogator_photo(img)) + styles.update(["Fooocus V2", "Fooocus Enhance", "Fooocus Sharp"]) + + if flags.describe_type_anime in modes: from extras.wd14tagger import default_interrogator as default_interrogator_anime - return default_interrogator_anime(img), ["Fooocus V2", "Fooocus Masterpiece"] - return mode, ["Fooocus V2"] + describe_prompts.append(default_interrogator_anime(img)) + styles.update(["Fooocus V2", "Fooocus Masterpiece"]) - describe_btn.click(trigger_describe, inputs=[describe_method, describe_input_image], - outputs=[prompt, style_selections], show_progress=True, queue=True) + if len(styles) == 0 or not apply_styles: + styles = gr.update() + else: + styles = list(styles) + + if len(describe_prompts) == 0: + describe_prompt = gr.update() + else: + describe_prompt = ', '.join(describe_prompts) + + return describe_prompt, styles + + describe_btn.click(trigger_describe, inputs=[describe_methods, describe_input_image, describe_apply_styles], + outputs=[prompt, style_selections], show_progress=True, queue=True) \ + .then(fn=style_sorter.sort_styles, inputs=style_selections, outputs=style_selections, queue=False, show_progress=False) \ + .then(lambda: None, _js='()=>{refresh_style_localization();}') if args_manager.args.enable_auto_describe_image: - def trigger_auto_describe(mode, img, prompt): + def trigger_auto_describe(mode, img, prompt, apply_styles): # keep prompt if not empty if prompt == '': - return trigger_describe(mode, img) + return trigger_describe(mode, img, apply_styles) return gr.update(), gr.update() - uov_input_image.upload(trigger_auto_describe, inputs=[describe_method, uov_input_image, prompt], - outputs=[prompt, style_selections], show_progress=True, queue=True) + uov_input_image.upload(trigger_auto_describe, inputs=[describe_methods, uov_input_image, prompt, describe_apply_styles], + outputs=[prompt, style_selections], show_progress=True, queue=True) \ + .then(fn=style_sorter.sort_styles, inputs=style_selections, outputs=style_selections, queue=False, show_progress=False) \ + .then(lambda: None, _js='()=>{refresh_style_localization();}') enhance_input_image.upload(lambda: gr.update(value=True), outputs=enhance_checkbox, queue=False, show_progress=False) \ - .then(trigger_auto_describe, inputs=[describe_method, enhance_input_image, prompt], outputs=[prompt, style_selections], show_progress=True, queue=True) + .then(trigger_auto_describe, inputs=[describe_methods, enhance_input_image, prompt, describe_apply_styles], + outputs=[prompt, style_selections], show_progress=True, queue=True) \ + .then(fn=style_sorter.sort_styles, inputs=style_selections, outputs=style_selections, queue=False, show_progress=False) \ + .then(lambda: None, _js='()=>{refresh_style_localization();}') def dump_default_english_config(): from modules.localization import dump_english_config