diff --git a/diffusers/scripts/exp_ldm_sd_gsa.sh b/diffusers/scripts/exp_ldm_sd_gsa.sh new file mode 100755 index 0000000..88bf2df --- /dev/null +++ b/diffusers/scripts/exp_ldm_sd_gsa.sh @@ -0,0 +1,4 @@ +python scripts/train_gsa.py --model-type sd --gsa-mode 1 --ckpt-path ../models/diffusers/stable-diffusion-v1-5/ --member-dataset laion-aesthetic-2-5k --holdout-dataset coco2017-val-2-5k --batch-size 10 +python scripts/train_gsa.py --model-type sd --gsa-mode 2 --ckpt-path ../models/diffusers/stable-diffusion-v1-5/ --member-dataset laion-aesthetic-2-5k --holdout-dataset coco2017-val-2-5k --batch-size 10 +python scripts/train_gsa.py --model-type ldm --gsa-mode 1 --ckpt-path ../models/diffusers/ldm-celebahq-256/ --member-dataset celeba-hq-2-5k --holdout-dataset ffhq-2-5k --batch-size 10 +python scripts/train_gsa.py --model-type ldm --gsa-mode 2 --ckpt-path ../models/diffusers/ldm-celebahq-256/ --member-dataset celeba-hq-2-5k --holdout-dataset ffhq-2-5k --batch-size 10 \ No newline at end of file diff --git a/diffusers/scripts/exp_ldm_sd_gsa_demo.sh b/diffusers/scripts/exp_ldm_sd_gsa_demo.sh index cc92acb..c06a970 100755 --- a/diffusers/scripts/exp_ldm_sd_gsa_demo.sh +++ b/diffusers/scripts/exp_ldm_sd_gsa_demo.sh @@ -1,4 +1,4 @@ -python scripts/train_gsa.py --model-type ldm --gsa-mode 1 --ckpt-path ../models/diffusers/ldm-celebahq-256/ --member-dataset celeba-hq-2-5k --holdout-dataset ffhq-2-5k --demo True -python scripts/train_gsa.py --model-type ldm --gsa-mode 2 --ckpt-path ../models/diffusers/ldm-celebahq-256/ --member-dataset celeba-hq-2-5k --holdout-dataset ffhq-2-5k --demo True python scripts/train_gsa.py --model-type sd --gsa-mode 1 --ckpt-path ../models/diffusers/stable-diffusion-v1-5/ --member-dataset laion-aesthetic-2-5k --holdout-dataset coco2017-val-2-5k --demo True -python scripts/train_gsa.py --model-type sd --gsa-mode 2 --ckpt-path ../models/diffusers/stable-diffusion-v1-5/ --member-dataset laion-aesthetic-2-5k --holdout-dataset coco2017-val-2-5k --demo True \ No newline at end of file +python scripts/train_gsa.py --model-type sd --gsa-mode 2 --ckpt-path ../models/diffusers/stable-diffusion-v1-5/ --member-dataset laion-aesthetic-2-5k --holdout-dataset coco2017-val-2-5k --demo True +python scripts/train_gsa.py --model-type ldm --gsa-mode 1 --ckpt-path ../models/diffusers/ldm-celebahq-256/ --member-dataset celeba-hq-2-5k --holdout-dataset ffhq-2-5k --demo True +python scripts/train_gsa.py --model-type ldm --gsa-mode 2 --ckpt-path ../models/diffusers/ldm-celebahq-256/ --member-dataset celeba-hq-2-5k --holdout-dataset ffhq-2-5k --demo True \ No newline at end of file diff --git a/diffusers/scripts/exp_ldm_sd_gsa_eval.sh b/diffusers/scripts/exp_ldm_sd_gsa_eval.sh new file mode 100755 index 0000000..2aaa321 --- /dev/null +++ b/diffusers/scripts/exp_ldm_sd_gsa_eval.sh @@ -0,0 +1,4 @@ +python scripts/train_gsa.py --model-type sd --gsa-mode 1 --ckpt-path ../models/diffusers/stable-diffusion-v1-5/ --member-dataset laion-aesthetic-2-5k --holdout-dataset coco2017-val-2-5k --eval True --batch-size 10 +python scripts/train_gsa.py --model-type sd --gsa-mode 2 --ckpt-path ../models/diffusers/stable-diffusion-v1-5/ --member-dataset laion-aesthetic-2-5k --holdout-dataset coco2017-val-2-5k --eval True --batch-size 10 +python scripts/train_gsa.py --model-type ldm --gsa-mode 1 --ckpt-path ../models/diffusers/ldm-celebahq-256/ --member-dataset celeba-hq-2-5k --holdout-dataset ffhq-2-5k --eval True --batch-size 10 +python scripts/train_gsa.py --model-type ldm --gsa-mode 2 --ckpt-path ../models/diffusers/ldm-celebahq-256/ --member-dataset celeba-hq-2-5k --holdout-dataset ffhq-2-5k --eval True --batch-size 10 \ No newline at end of file diff --git a/diffusers/scripts/exp_ldm_sd_gsa_eval_demo.sh b/diffusers/scripts/exp_ldm_sd_gsa_eval_demo.sh new file mode 100755 index 0000000..536eac5 --- /dev/null +++ b/diffusers/scripts/exp_ldm_sd_gsa_eval_demo.sh @@ -0,0 +1,4 @@ +python scripts/train_gsa.py --model-type sd --gsa-mode 1 --ckpt-path ../models/diffusers/stable-diffusion-v1-5/ --member-dataset laion-aesthetic-2-5k --holdout-dataset coco2017-val-2-5k --demo True --eval True +python scripts/train_gsa.py --model-type sd --gsa-mode 2 --ckpt-path ../models/diffusers/stable-diffusion-v1-5/ --member-dataset laion-aesthetic-2-5k --holdout-dataset coco2017-val-2-5k --demo True --eval True +python scripts/train_gsa.py --model-type ldm --gsa-mode 1 --ckpt-path ../models/diffusers/ldm-celebahq-256/ --member-dataset celeba-hq-2-5k --holdout-dataset ffhq-2-5k --demo True --eval True +python scripts/train_gsa.py --model-type ldm --gsa-mode 2 --ckpt-path ../models/diffusers/ldm-celebahq-256/ --member-dataset celeba-hq-2-5k --holdout-dataset ffhq-2-5k --demo True --eval True \ No newline at end of file diff --git a/diffusers/scripts/train_gsa.py b/diffusers/scripts/train_gsa.py index c1b3278..ab8e929 100644 --- a/diffusers/scripts/train_gsa.py +++ b/diffusers/scripts/train_gsa.py @@ -9,9 +9,9 @@ import argparse import json,time from accelerate import Accelerator +import pickle -from xgboost import XGBClassifier -from sklearn import metrics +from sklearn.ensemble import RandomForestClassifier from sklearn import preprocessing from stable_copyright import GSALatentDiffusionPipeline, SecMIDDIMScheduler, GSAStableDiffusionPipeline @@ -20,11 +20,11 @@ def load_pipeline(ckpt_path, device='cuda:0', model_type='sd'): if model_type == 'sd': - pipe = GSAStableDiffusionPipeline.from_pretrained(ckpt_path, torch_dtype=torch.float32) + pipe = GSAStableDiffusionPipeline.from_pretrained(ckpt_path, torch_dtype=torch.float16) pipe.scheduler = SecMIDDIMScheduler.from_config(pipe.scheduler.config) pipe = pipe.to(device) elif model_type == 'ldm': - pipe = GSALatentDiffusionPipeline.from_pretrained(ckpt_path, torch_dtype=torch.float32) + pipe = GSALatentDiffusionPipeline.from_pretrained(ckpt_path, torch_dtype=torch.float16) # pipe.scheduler = SecMIDDIMScheduler.from_config(pipe.scheduler.config) elif model_type == 'sdxl': raise NotImplementedError('SDXL not implemented yet') @@ -45,7 +45,7 @@ def get_reverse_denoise_results(pipe, dataloader, device, gsa_mode, demo): pipe, optimizer, dataloader ) - weight_dtype = torch.float32 + weight_dtype = torch.float16 features, path_log = [], [] for batch_idx, batch in enumerate(tqdm.tqdm(dataloader)): path_log.extend(batch['path']) @@ -61,7 +61,7 @@ def get_reverse_denoise_results(pipe, dataloader, device, gsa_mode, demo): for feature in gsa_features: features.append(feature.detach().clone().cpu()) - if demo and batch_idx > 0: + if demo and batch_idx > 9: break return torch.stack(features, dim=0), path_log @@ -85,25 +85,24 @@ def preprocess(member, non_member): def train_xgboost(member_features, nonmember_features): x, y = preprocess(member_features, nonmember_features) # print(x, y) - xgb = XGBClassifier(n_estimators=1000) + xgb = RandomForestClassifier(max_depth=20, random_state=0) xgb.fit(x, y) y_pred = xgb.predict(x) # print(np.isnan(x).any()) - print(x, y, y_pred, x.min(), x.max(), x.shape) - member_scores = torch.tensor(y_pred[y >= 0.5]) - nonmember_scores = torch.tensor(y_pred[y < 0.5]) + member_scores = torch.tensor(y_pred[y <= 0.5]) + nonmember_scores = torch.tensor(y_pred[y > 0.5]) return xgb, member_scores, nonmember_scores def test_xgboost(xgb_save_path, member_features, nonmember_features): x, y = preprocess(member_features, nonmember_features) - - xgb = XGBClassifier(n_estimators=200) - xgb.load_model(xgb_save_path) + + with open(xgb_save_path, 'rb') as f: + xgb = pickle.load(f) y_pred = xgb.predict(x) - member_scores = torch.tensor(y_pred[y >= 0.5]) - nonmember_scores = torch.tensor(y_pred[y < 0.5]) + member_scores = torch.tensor(y_pred[y <= 0.5]) + nonmember_scores = torch.tensor(y_pred[y > 0.5]) return member_scores, nonmember_scores @@ -136,7 +135,8 @@ def main(args): # train a xgboost xgb, member_scores, nonmember_scores = train_xgboost(member_features, nonmember_features) - xgb.save_model(args.output + f'xgboost_gsa_{args.gsa_mode}_{args.model_type}.bin') + with open(args.output + f'xgboost_gsa_{args.gsa_mode}_{args.model_type}.bin', 'wb') as f: + pickle.dump(xgb, f) benchmark(member_scores, nonmember_scores, f'gsa_{args.gsa_mode}_{args.model_type}_score', args.output) @@ -167,7 +167,7 @@ def main(args): TPR = TP / (TP + FN) FPR = FP / (FP + TN) - extra_output = dict(TPR=TPR, FPR=FPR) + extra_output = dict(TPR=TPR.item(), FPR=FPR.item()) with open(args.output + f'gsa_{args.gsa_mode}_{args.model_type}_score' + '_extra.json', 'w') as file: json.dump(extra_output, file, indent=4) diff --git a/diffusers/stable_copyright/data_utils.py b/diffusers/stable_copyright/data_utils.py index c805333..0691117 100644 --- a/diffusers/stable_copyright/data_utils.py +++ b/diffusers/stable_copyright/data_utils.py @@ -77,7 +77,7 @@ def test(member_scores, nonmember_scores, experiment, output_path, threshold_pat json.dump(output, file, indent=4) def benchmark(member_scores, nonmember_scores, experiment, output_path): - + # print(member_scores, nonmember_scores) min_score = min(member_scores.min(), nonmember_scores.min()) max_score = max(member_scores.max(), nonmember_scores.max()) diff --git a/diffusers/stable_copyright/gsa_pipeline_stable_diffusion.py b/diffusers/stable_copyright/gsa_pipeline_stable_diffusion.py index 8c1ce11..dc6035c 100644 --- a/diffusers/stable_copyright/gsa_pipeline_stable_diffusion.py +++ b/diffusers/stable_copyright/gsa_pipeline_stable_diffusion.py @@ -374,20 +374,11 @@ def __call__( else: raise NotImplementedError(f"Mode {gsa_mode} out of 1 and 2") - if not output_type == "latent": - image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[ - 0 - ] - else: - image = latents - - do_denormalize = [True] * image.shape[0] - image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) # Offload all models self.maybe_free_model_hooks() if not return_dict: - return (image, gsa_features) + return (None, gsa_features) - return GSAStableDiffusionPipelineOutput(images=image, gsa_features=gsa_features) \ No newline at end of file + return GSAStableDiffusionPipelineOutput(images=None, gsa_features=gsa_features) \ No newline at end of file