diff --git a/docs/source/_static/openapi.json b/docs/source/_static/openapi.json index bbc50acc3..5646cb8c4 100644 --- a/docs/source/_static/openapi.json +++ b/docs/source/_static/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"JoliGEN server","description":"*commit:* [7fcf7906](https://github.com/jolibrain/joliGEN/commit/7fcf790661e64d458238c2ac68f440098caa0bae)\n\nThis is the JoliGEN server API documentation.\n","version":"0.1.0"},"paths":{"/train/{name}":{"get":{"summary":"Get the status of a training process","operationId":"get_train_train__name__get","parameters":[{"required":true,"schema":{"type":"string","title":"Name"},"name":"name","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"summary":"Start a training process with given name.","description":"The training process will be created using the same options as command line","operationId":"train_train__name__post","parameters":[{"required":true,"schema":{"type":"string","title":"Name"},"name":"name","in":"path"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/TrainOptions"}}}},"responses":{"201":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"summary":"Delete a training process.","description":"If the process is running, it will be stopped.","operationId":"delete_train_train__name__delete","parameters":[{"required":true,"schema":{"type":"string","title":"Name"},"name":"name","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/train":{"get":{"summary":"Get the status of all training processes","operationId":"get_train_processes_train_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/info":{"get":{"summary":"Get the server status","operationId":"get_info_info_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/fs/":{"delete":{"summary":"Delete a file or a directory in the filesystem","description":"This endpoint can be dangerous, use it with extreme caution","operationId":"delete_path_fs__delete","parameters":[{"required":true,"schema":{"type":"string","title":"Path"},"name":"path","in":"query"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"TrainOptions":{"title":"TrainBody","type":"object","properties":{"server":{"title":"Server","default":{"sync":false},"allOf":[{"$ref":"#/definitions/ServerTrainOptions"}]},"train_options":{"title":"TrainOptions","type":"object","properties":{"D":{"title":"Discriminator","type":"object","properties":{"dropout":{"default":false,"type":"boolean","description":"whether to use dropout in the discriminator"},"n_layers":{"default":3,"type":"integer","description":"only used if netD==n_layers"},"ndf":{"default":64,"type":"integer","description":"\\# of discrim filters in the first conv layer"},"netDs":{"default":["projected_d","basic"],"type":"array","items":{"enum":null,"type":"string"},"description":"specify discriminator architecture, another option, --D_n_layers allows you to specify the layers in the n_layers discriminator. NB: duplicated arguments are ignored. Values: basic, n_layers, pixel, projected_d, temporal, vision_aided, depth, mask, sam"},"no_antialias":{"default":false,"type":"boolean","description":"if specified, use stride=2 convs instead of antialiased-downsampling (sad)"},"no_antialias_up":{"default":false,"type":"boolean","description":"if specified, use [upconv(learned filter)] instead of [upconv(hard-coded [1,3,3,1] filter), conv]"},"norm":{"default":"instance","type":"string","description":"instance normalization or batch normalization for D","enum":["instance","batch","none"]},"proj_config_segformer":{"default":"models/configs/segformer/segformer_config_b0.json","type":"string","description":"path to segformer configuration file"},"proj_interp":{"default":-1,"type":"integer","description":"whether to force projected discriminator interpolation to a value \\> 224, -1 means no interpolation"},"proj_network_type":{"default":"efficientnet","type":"string","description":"projected discriminator architecture","enum":["efficientnet","segformer","vitbase","vitsmall","vitsmall2","vitclip16","vitclip14","depth","dinov2_vits14","dinov2_vitb14","dinov2_vitl14","dinov2_vitg14","dinov2_vits14_reg","dinov2_vitb14_reg","dinov2_vitl14_reg","dinov2_vitg14_reg"]},"proj_weight_segformer":{"default":"models/configs/segformer/pretrain/segformer_mit-b0.pth","type":"string","description":"path to segformer weight"},"spectral":{"default":false,"type":"boolean","description":"whether to use spectral norm in the discriminator"},"temporal_every":{"default":4,"type":"integer","description":"apply temporal discriminator every x steps"},"vision_aided_backbones":{"default":"clip+dino+swin","type":"string","description":"specify vision aided discriminators architectures, they are frozen then output are combined and fitted with a linear network on top, choose from dino, clip, swin, det_coco, seg_ade and combine them with +"},"weight_sam":{"default":"","type":"string","description":"path to sam weight for D, e.g. models/configs/sam/pretrain/sam_vit_b_01ec64.pth, or models/configs/sam/pretrain/mobile_sam.pt for MobileSAM"}}},"G":{"title":"Generator","type":"object","properties":{"attn_nb_mask_attn":{"default":10,"type":"integer","description":"number of attention masks in _attn model architectures"},"attn_nb_mask_input":{"default":1,"type":"integer","description":"number of mask dedicated to input in _attn model architectures"},"backward_compatibility_twice_resnet_blocks":{"default":false,"type":"boolean","description":"if true, feats will go througt resnet blocks two times for resnet_attn generators. This option will be deleted, it's for backward compatibility (old models were trained that way)."},"config_segformer":{"default":"models/configs/segformer/segformer_config_b0.json","type":"string","description":"path to segformer configuration file for G"},"diff_n_timestep_test":{"default":1000,"type":"integer","description":"Number of timesteps used for UNET mha inference (test time)."},"diff_n_timestep_train":{"default":2000,"type":"integer","description":"Number of timesteps used for UNET mha training."},"dropout":{"default":false,"type":"boolean","description":"dropout for the generator"},"nblocks":{"default":9,"type":"integer","description":"\\# of layer blocks in G, applicable to resnets"},"netE":{"default":"resnet_256","type":"string","description":"specify multimodal latent vector encoder","enum":["resnet_128","resnet_256","resnet_512","conv_128","conv_256","conv_512"]},"netG":{"default":"mobile_resnet_attn","type":"string","description":"specify generator architecture","enum":["resnet","resnet_attn","mobile_resnet","mobile_resnet_attn","unet_256","unet_128","segformer_attn_conv","segformer_conv","ittr","unet_mha","uvit","unet_mha_ref_attn"]},"ngf":{"default":64,"type":"integer","description":"\\# of gen filters in the last conv layer"},"norm":{"default":"instance","type":"string","description":"instance normalization or batch normalization for G","enum":["instance","batch","none"]},"padding_type":{"default":"reflect","type":"string","description":"whether to use padding in the generator","enum":["reflect","replicate","zeros"]},"spectral":{"default":false,"type":"boolean","description":"whether to use spectral norm in the generator"},"unet_mha_attn_res":{"default":[16],"type":"array","items":{"enum":null,"type":"string"},"description":"downrate samples at which attention takes place"},"unet_mha_channel_mults":{"default":[1,2,4,8],"type":"array","items":{"enum":null,"type":"string"},"description":"channel multiplier for each level of the UNET mha"},"unet_mha_group_norm_size":{"default":32,"type":"integer","description":""},"unet_mha_norm_layer":{"default":"groupnorm","type":"string","description":"","enum":["groupnorm","batchnorm","layernorm","instancenorm","switchablenorm"]},"unet_mha_num_head_channels":{"default":32,"type":"integer","description":"number of channels in each head of the mha architecture"},"unet_mha_num_heads":{"default":1,"type":"integer","description":"number of heads in the mha architecture"},"unet_mha_res_blocks":{"default":[2,2,2,2],"type":"array","items":{"enum":null,"type":"string"},"description":"distribution of resnet blocks across the UNet stages, should have same size as --G_unet_mha_channel_mults"},"unet_mha_vit_efficient":{"default":false,"type":"boolean","description":"if true, use efficient attention in UNet and UViT"},"uvit_num_transformer_blocks":{"default":6,"type":"integer","description":"Number of transformer blocks in UViT"}}},"alg":{"title":"Algorithm-specific","type":"object","properties":{"gan":{"title":"GAN model","type":"object","properties":{"lambda":{"default":1.0,"type":"number","description":"weight for GAN loss:GAN(G(X))"}}},"cut":{"title":"CUT model","type":"object","properties":{"HDCE_gamma":{"default":1.0,"type":"number","description":""},"HDCE_gamma_min":{"default":1.0,"type":"number","description":""},"MSE_idt":{"default":false,"type":"boolean","description":"use MSENCE loss for identity mapping: MSE(G(Y), Y))"},"flip_equivariance":{"default":false,"type":"boolean","description":"Enforce flip-equivariance as additional regularization. It's used by FastCUT, but not CUT"},"lambda_MSE_idt":{"default":1.0,"type":"number","description":"weight for MSE identity loss: MSE(G(X), X)"},"lambda_NCE":{"default":1.0,"type":"number","description":"weight for NCE loss: NCE(G(X), X)"},"lambda_SRC":{"default":0.0,"type":"number","description":"weight for SRC (semantic relation consistency) loss: NCE(G(X), X)"},"nce_T":{"default":0.07,"type":"number","description":"temperature for NCE loss"},"nce_idt":{"default":true,"type":"boolean","description":"use NCE loss for identity mapping: NCE(G(Y), Y))"},"nce_includes_all_negatives_from_minibatch":{"default":false,"type":"boolean","description":"(used for single image translation) If True, include the negatives from the other samples of the minibatch when computing the contrastive loss. Please see models/patchnce.py for more details."},"nce_layers":{"default":"0,4,8,12,16","type":"string","description":"compute NCE loss on which layers"},"nce_loss":{"default":"monce","type":"string","description":"CUT contrastice loss","enum":["patchnce","monce","SRC_hDCE"]},"netF":{"default":"mlp_sample","type":"string","description":"how to downsample the feature map","enum":["sample","mlp_sample","sample_qsattn","mlp_sample_qsattn"]},"netF_dropout":{"default":false,"type":"boolean","description":"whether to use dropout with F"},"netF_nc":{"default":256,"type":"integer","description":""},"netF_norm":{"default":"instance","type":"string","description":"instance normalization or batch normalization for F","enum":["instance","batch","none"]},"num_patches":{"default":256,"type":"integer","description":"number of patches per layer"}}},"cyclegan":{"title":"CycleGAN model","type":"object","properties":{"lambda_A":{"default":10.0,"type":"number","description":"weight for cycle loss (A -\\> B -\\> A)"},"lambda_B":{"default":10.0,"type":"number","description":"weight for cycle loss (B -\\> A -\\> B)"},"lambda_identity":{"default":0.5,"type":"number","description":"use identity mapping. Setting lambda_identity other than 0 has an effect of scaling the weight of the identity mapping loss. For example, if the weight of the identity loss should be 10 times smaller than the weight of the reconstruction loss, please set lambda_identity = 0.1"},"rec_noise":{"default":0.0,"type":"number","description":"whether to add noise to reconstruction"}}},"re":{"title":"ReCUT / ReCycleGAN","type":"object","properties":{"P_lr":{"default":0.0002,"type":"number","description":"initial learning rate for P networks"},"adversarial_loss_p":{"default":false,"type":"boolean","description":"if True, also train the prediction model with an adversarial loss"},"netP":{"default":"unet_128","type":"string","description":"specify P architecture","enum":["resnet_9blocks","resnet_6blocks","resnet_attn","unet_256","unet_128"]},"no_train_P_fake_images":{"default":false,"type":"boolean","description":"if True, P wont be trained over fake images projections"},"nuplet_size":{"default":3,"type":"integer","description":"Number of frames loaded"},"projection_threshold":{"default":1.0,"type":"number","description":"threshold of the real images projection loss below with fake projection and fake reconstruction losses are applied"}}},"palette":{"title":"Diffusion model","type":"object","properties":{"computed_sketch_list":{"default":["canny","hed"],"type":"array","items":{"enum":null,"type":"string"},"description":"what primitives to use for random sketch"},"cond_embed_dim":{"default":32,"type":"integer","description":"nb of examples processed for inference"},"cond_image_creation":{"default":"y_t","type":"string","description":"how image conditioning is created: either from y_t (no conditioning), previous frame, from computed sketch (e.g. canny), from low res image or from reference image (i.e. image that is not aligned with the ground truth)","enum":["y_t","previous_frame","computed_sketch","low_res","ref"]},"conditioning":{"default":"","type":"string","description":"whether to use conditioning or not","enum":["","mask","class","mask_and_class","ref"]},"ddim_eta":{"default":0.5,"type":"number","description":"eta for ddim sampling variance"},"ddim_num_steps":{"default":10,"type":"integer","description":"number of steps for ddim sampling"},"dropout_prob":{"default":0.0,"type":"number","description":"dropout probability for classifier-free guidance"},"generate_per_class":{"default":false,"type":"boolean","description":"whether to generate samples of each images"},"inference_num":{"default":-1,"type":"integer","description":"nb of examples processed for inference"},"lambda_G":{"default":1.0,"type":"number","description":"weight for supervised loss"},"loss":{"default":"MSE","type":"string","description":"loss type of the denoising model","enum":["L1","MSE","multiscale"]},"prob_use_previous_frame":{"default":0.5,"type":"number","description":"prob to use previous frame as y cond"},"ref_embed_net":{"default":"clip","type":"string","description":"embedding network to use for ref conditioning","enum":["clip","imagebind"]},"sam_crop_delta":{"default":true,"type":"boolean","description":"extend crop's width and height by 2\\*crop_delta before computing masks"},"sam_final_canny":{"default":false,"type":"boolean","description":"whether to perform a Canny edge detection on sam sketch to soften the edges"},"sam_max_mask_area":{"default":0.99,"type":"number","description":"maximum area in proportion of image size for a mask to be kept"},"sam_min_mask_area":{"default":0.001,"type":"number","description":"minimum area in proportion of image size for a mask to be kept"},"sam_no_output_binary_sam":{"default":false,"type":"boolean","description":"whether to not output binary sketch before Canny"},"sam_no_sample_points_in_ellipse":{"default":false,"type":"boolean","description":"whether to not sample the points inside an ellipse to avoid the corners of the image"},"sam_no_sobel_filter":{"default":false,"type":"boolean","description":"whether to not use a Sobel filter on each SAM masks"},"sam_points_per_side":{"default":16,"type":"integer","description":"number of points per side of image to prompt SAM with (\\# of prompted points will be points_per_side\\*\\*2)"},"sam_redundancy_threshold":{"default":0.62,"type":"number","description":"redundancy threshold above which redundant masks are not kept"},"sam_sobel_threshold":{"default":0.7,"type":"number","description":"sobel threshold in % of gradient magintude"},"sam_use_gaussian_filter":{"default":false,"type":"boolean","description":"whether to apply a Gaussian blur to each SAM masks"},"sampling_method":{"default":"ddpm","type":"string","description":"choose the sampling method between ddpm and ddim","enum":["ddpm","ddim"]},"sketch_canny_range":{"default":[0,765],"type":"array","items":{"enum":null,"type":"string"},"description":"range of randomized canny sketch thresholds"},"super_resolution_scale":{"default":2.0,"type":"number","description":"scale for super resolution"},"task":{"default":"inpainting","type":"string","description":"Whether to perform inpainting, super resolution or pix2pix","enum":["inpainting","super_resolution","pix2pix"]}}}}},"data":{"title":"Datasets","type":"object","properties":{"online_creation":{"title":"Online created datasets","type":"object","properties":{"color_mask_A":{"default":false,"type":"boolean","description":"Perform task of replacing color-filled masks by objects"},"crop_delta_A":{"default":50,"type":"integer","description":"size of crops are random, values allowed are online_creation_crop_size more or less online_creation_crop_delta for domain A"},"crop_delta_B":{"default":50,"type":"integer","description":"size of crops are random, values allowed are online_creation_crop_size more or less online_creation_crop_delta for domain B"},"crop_size_A":{"default":512,"type":"integer","description":"crop to this size during online creation, it needs to be greater than bbox size for domain A"},"crop_size_B":{"default":512,"type":"integer","description":"crop to this size during online creation, it needs to be greater than bbox size for domain B"},"load_size_A":{"default":[],"type":"array","items":{"enum":null,"type":"string"},"description":"load to this size during online creation, format : width height or only one size if square"},"load_size_B":{"default":[],"type":"array","items":{"enum":null,"type":"string"},"description":"load to this size during online creation, format : width height or only one size if square"},"mask_delta_A":{"default":[[]],"type":"array","items":{"enum":null,"type":"string"},"description":"mask offset (in pixels) to allow generation of a bigger object in domain B (for semantic loss) for domain A, format : 'width (x),height (y)' for each class or only one size if square, e.g. '125, 55 100, 100' for 2 classes"},"mask_delta_A_ratio":{"default":[[]],"type":"array","items":{"enum":null,"type":"string"},"description":"ratio mask offset to allow generation of a bigger object in domain B (for semantic loss) for domain A, format : width (x),height (y) for each class or only one size if square"},"mask_delta_B":{"default":[[]],"type":"array","items":{"enum":null,"type":"string"},"description":"mask offset (in pixels) to allow generation of a bigger object in domain A (for semantic loss) for domain B, format : 'width (x),height (y)' for each class or only one size if square, e.g. '125, 55 100, 100' for 2 classes"},"mask_delta_B_ratio":{"default":[[]],"type":"array","items":{"enum":null,"type":"string"},"description":"ratio mask offset to allow generation of a bigger object in domain A (for semantic loss) for domain B, format : 'width (x),height (y)' for each class or only one size if square"},"mask_random_offset_A":{"default":[0.0],"type":"array","items":{"enum":null,"type":"string"},"description":"ratio mask size randomization (only to make bigger one) to robustify the image generation in domain A, format : width (x) height (y) or only one size if square"},"mask_random_offset_B":{"default":[0.0],"type":"array","items":{"enum":null,"type":"string"},"description":"mask size randomization (only to make bigger one) to robustify the image generation in domain B, format : width (y) height (x) or only one size if square"},"mask_square_A":{"default":false,"type":"boolean","description":"whether masks should be squared for domain A"},"mask_square_B":{"default":false,"type":"boolean","description":"whether masks should be squared for domain B"},"rand_mask_A":{"default":false,"type":"boolean","description":"Perform task of replacing noised masks by objects"}}},"crop_size":{"default":256,"type":"integer","description":"then crop to this size"},"dataset_mode":{"default":"unaligned","type":"string","description":"chooses how datasets are loaded.","enum":["unaligned","unaligned_labeled_cls","unaligned_labeled_mask","self_supervised_labeled_mask","unaligned_labeled_mask_cls","self_supervised_labeled_mask_cls","unaligned_labeled_mask_online","self_supervised_labeled_mask_online","unaligned_labeled_mask_cls_online","self_supervised_labeled_mask_cls_online","aligned","nuplet_unaligned_labeled_mask","temporal_labeled_mask_online","self_supervised_temporal","single","unaligned_labeled_mask_ref","self_supervised_labeled_mask_ref","unaligned_labeled_mask_online_ref","self_supervised_labeled_mask_online_ref"]},"direction":{"default":"AtoB","type":"string","description":"AtoB or BtoA","enum":["AtoB","BtoA"]},"inverted_mask":{"default":false,"type":"boolean","description":"whether to invert the mask, i.e. around the bbox"},"load_size":{"default":286,"type":"integer","description":"scale images to this size"},"max_dataset_size":{"default":1000000000,"type":"integer","description":"Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded."},"num_threads":{"default":4,"type":"integer","description":"\\# threads for loading data"},"online_context_pixels":{"default":0,"type":"integer","description":"context pixel band around the crop, unused for generation, only for disc "},"online_fixed_mask_size":{"default":-1,"type":"integer","description":"if \\>0, it will be used as fixed bbox size (warning: in dataset resolution ie before resizing) "},"online_select_category":{"default":-1,"type":"integer","description":"category to select for bounding boxes, -1 means all boxes selected"},"online_single_bbox":{"default":false,"type":"boolean","description":"whether to only allow a single bbox per online crop"},"preprocess":{"default":"resize_and_crop","type":"string","description":"scaling and cropping of images at load time","enum":["resize_and_crop","crop","scale_width","scale_width_and_crop","none"]},"refined_mask":{"default":false,"type":"boolean","description":"whether to use refined mask with sam"},"relative_paths":{"default":false,"type":"boolean","description":"whether paths to images are relative to dataroot"},"sanitize_paths":{"default":false,"type":"boolean","description":"if true, wrong images or labels paths will be removed before training"},"serial_batches":{"default":false,"type":"boolean","description":"if true, takes images in order to make batches, otherwise takes them randomly"},"temporal_frame_step":{"default":30,"type":"integer","description":"how many frames between successive frames selected"},"temporal_num_common_char":{"default":-1,"type":"integer","description":"how many characters (the first ones) are used to identify a video; if =-1 natural sorting is used "},"temporal_number_frames":{"default":5,"type":"integer","description":"how many successive frames use for temporal loader"}}},"f_s":{"title":"Semantic segmentation network","type":"object","properties":{"all_classes_as_one":{"default":false,"type":"boolean","description":"if true, all classes will be considered as the same one (ie foreground vs background)"},"class_weights":{"default":[],"type":"array","items":{"enum":null,"type":"string"},"description":"class weights for imbalanced semantic classes"},"config_segformer":{"default":"models/configs/segformer/segformer_config_b0.json","type":"string","description":"path to segformer configuration file for f_s"},"dropout":{"default":false,"type":"boolean","description":"dropout for the semantic network"},"net":{"default":"vgg","type":"string","description":"specify f_s network [vgg|unet|segformer|sam]","enum":["vgg","unet","segformer","sam"]},"nf":{"default":64,"type":"integer","description":"\\# of filters in the first conv layer of classifier"},"semantic_nclasses":{"default":2,"type":"integer","description":"number of classes of the semantic loss classifier"},"semantic_threshold":{"default":1.0,"type":"number","description":"threshold of the semantic classifier loss below with semantic loss is applied"},"weight_sam":{"default":"","type":"string","description":"path to sam weight for f_s, e.g. models/configs/sam/pretrain/sam_vit_b_01ec64.pth, or models/configs/sam/pretrain/mobile_sam.pt for MobileSAM"},"weight_segformer":{"default":"","type":"string","description":"path to segformer weight for f_s, e.g. models/configs/segformer/pretrain/segformer_mit-b0.pth"}}},"cls":{"title":"Semantic classification network","type":"object","properties":{"all_classes_as_one":{"default":false,"type":"boolean","description":"if true, all classes will be considered as the same one (ie foreground vs background)"},"class_weights":{"default":[],"type":"array","items":{"enum":null,"type":"string"},"description":"class weights for imbalanced semantic classes"},"config_segformer":{"default":"models/configs/segformer/segformer_config_b0.json","type":"string","description":"path to segformer configuration file for cls"},"dropout":{"default":false,"type":"boolean","description":"dropout for the semantic network"},"net":{"default":"vgg","type":"string","description":"specify cls network [vgg|unet|segformer]","enum":["vgg","unet","segformer"]},"nf":{"default":64,"type":"integer","description":"\\# of filters in the first conv layer of classifier"},"semantic_nclasses":{"default":2,"type":"integer","description":"number of classes of the semantic loss classifier"},"semantic_threshold":{"default":1.0,"type":"number","description":"threshold of the semantic classifier loss below with semantic loss is applied"},"weight_segformer":{"default":"","type":"string","description":"path to segformer weight for cls, e.g. models/configs/segformer/pretrain/segformer_mit-b0.pth"}}},"output":{"title":"Output","type":"object","properties":{"display":{"title":"Visdom display","type":"object","properties":{"G_attention_masks":{"default":false,"type":"boolean","description":""},"aim_port":{"default":53800,"type":"integer","description":"aim port of the web display"},"aim_server":{"default":"http://localhost","type":"string","description":"aim server of the web display"},"diff_fake_real":{"default":false,"type":"boolean","description":"if True x - G(x) is displayed"},"env":{"default":"","type":"string","description":"visdom display environment name (default is \"main\")"},"freq":{"default":400,"type":"integer","description":"frequency of showing training results on screen"},"id":{"default":1,"type":"integer","description":"window id of the web display"},"ncols":{"default":0,"type":"integer","description":"if positive, display all images in a single visdom web panel with certain number of images per row.(if == 0 ncols will be computed automatically)"},"networks":{"default":false,"type":"boolean","description":"Set True if you want to display networks on port 8000"},"type":{"default":["visdom"],"type":"array","items":{"enum":null,"type":"string"},"description":"output display, either visdom, aim or no output","enum":["visdom","aim","none"]},"visdom_autostart":{"default":false,"type":"boolean","description":"whether to start a visdom server automatically"},"visdom_port":{"default":8097,"type":"integer","description":"visdom port of the web display"},"visdom_server":{"default":"http://localhost","type":"string","description":"visdom server of the web display"},"winsize":{"default":256,"type":"integer","description":"display window size for both visdom and HTML"}}},"no_html":{"default":false,"type":"boolean","description":"do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/"},"print_freq":{"default":100,"type":"integer","description":"frequency of showing training results on console"},"update_html_freq":{"default":1000,"type":"integer","description":"frequency of saving training results to html"},"verbose":{"default":false,"type":"boolean","description":"if specified, print more debugging information"}}},"model":{"title":"Model","type":"object","properties":{"depth_network":{"default":"DPT_Large","type":"string","description":"specify depth prediction network architecture","enum":["DPT_Large","DPT_Hybrid","MiDaS_small","DPT_BEiT_L_512","DPT_BEiT_L_384","DPT_BEiT_B_384","DPT_SwinV2_L_384","DPT_SwinV2_B_384","DPT_SwinV2_T_256","DPT_Swin_L_384","DPT_Next_ViT_L_384","DPT_LeViT_224"]},"init_gain":{"default":0.02,"type":"number","description":"scaling factor for normal, xavier and orthogonal."},"init_type":{"default":"normal","type":"string","description":"network initialization","enum":["normal","xavier","kaiming","orthogonal"]},"input_nc":{"default":3,"type":"integer","description":"\\# of input image channels: 3 for RGB and 1 for grayscale","enum":[1,3]},"multimodal":{"default":false,"type":"boolean","description":"multimodal model with random latent input vector"},"output_nc":{"default":3,"type":"integer","description":"\\# of output image channels: 3 for RGB and 1 for grayscale","enum":[1,3]},"prior_321_backwardcompatibility":{"default":false,"type":"boolean","description":"whether to load models from previous version of JG."},"type_sam":{"default":"mobile_sam","type":"string","description":"which model to use for segment-anything mask generation","enum":["sam","mobile_sam"]}}},"train":{"title":"Training","type":"object","properties":{"sem":{"title":"Semantic training","type":"object","properties":{"cls_B":{"default":false,"type":"boolean","description":"if true cls will be trained not only on domain A but also on domain B"},"cls_lambda":{"default":1.0,"type":"number","description":"weight for semantic class loss"},"cls_pretrained":{"default":false,"type":"boolean","description":"whether to use a pretrained model, available for non \"basic\" model only"},"cls_template":{"default":"basic","type":"string","description":"classifier/regressor model type, from torchvision (resnet18, ...), default is custom simple model"},"idt":{"default":false,"type":"boolean","description":"if true apply semantic loss on identity"},"lr_cls":{"default":0.0002,"type":"number","description":"cls learning rate"},"lr_f_s":{"default":0.0002,"type":"number","description":"f_s learning rate"},"mask_lambda":{"default":1.0,"type":"number","description":"weight for semantic mask loss"},"net_output":{"default":false,"type":"boolean","description":"if true apply generator semantic loss on network output for real image rather than on label."},"use_label_B":{"default":false,"type":"boolean","description":"if true domain B has labels too"}}},"mask":{"title":"Semantic training with masks","type":"object","properties":{"charbonnier_eps":{"default":1e-06,"type":"number","description":"Charbonnier loss epsilon value"},"compute_miou":{"default":false,"type":"boolean","description":"whether to compute mIoU on semantic masks prediction"},"disjoint_f_s":{"default":false,"type":"boolean","description":"whether to use a disjoint f_s with the same exact structure"},"f_s_B":{"default":false,"type":"boolean","description":"if true f_s will be trained not only on domain A but also on domain B"},"for_removal":{"default":false,"type":"boolean","description":"if true, object removal mode, domain B images with label 0, cut models only"},"lambda_out_mask":{"default":10.0,"type":"number","description":"weight for loss out mask"},"loss_out_mask":{"default":"L1","type":"string","description":"loss for out mask content (which should not change).","enum":["L1","MSE","Charbonnier"]},"miou_every":{"default":1000,"type":"integer","description":"compute mIoU every n iterations"},"no_train_f_s_A":{"default":false,"type":"boolean","description":"if true f_s wont be trained on domain A"},"out_mask":{"default":false,"type":"boolean","description":"use loss out mask"}}},"D_accuracy_every":{"default":1000,"type":"integer","description":"compute D accuracy every N iterations"},"D_lr":{"default":0.0001,"type":"number","description":"discriminator separate learning rate"},"G_ema":{"default":false,"type":"boolean","description":"whether to build G via exponential moving average"},"G_ema_beta":{"default":0.999,"type":"number","description":"exponential decay for ema"},"G_lr":{"default":0.0002,"type":"number","description":"initial learning rate for generator"},"batch_size":{"default":1,"type":"integer","description":"input batch size"},"beta1":{"default":0.9,"type":"number","description":"momentum term of adam"},"beta2":{"default":0.999,"type":"number","description":"momentum term of adam"},"cls_l1_regression":{"default":false,"type":"boolean","description":"if true l1 loss will be used to compute regressor loss"},"cls_regression":{"default":false,"type":"boolean","description":"if true cls will be a regressor and not a classifier"},"compute_D_accuracy":{"default":false,"type":"boolean","description":"whether to compute D accuracy explicitely"},"compute_metrics_test":{"default":false,"type":"boolean","description":"whether to compute test metrics, e.g. FID, ..."},"continue":{"default":false,"type":"boolean","description":"continue training: load the latest model"},"epoch":{"default":"latest","type":"string","description":"which epoch to load? set to latest to use latest cached model"},"epoch_count":{"default":1,"type":"integer","description":"the starting epoch count, we save the model by \\, \\+\\, ..."},"export_jit":{"default":false,"type":"boolean","description":"whether to export model in jit format"},"feat_wavelet":{"default":false,"type":"boolean","description":"if true, train in wavelet features space (Note: this may not include all discriminators, when training GANs)"},"gan_mode":{"default":"lsgan","type":"string","description":"the type of GAN objective. vanilla GAN loss is the cross-entropy objective used in the original GAN paper.","enum":["vanilla","lsgan","wgangp","projected"]},"iter_size":{"default":1,"type":"integer","description":"backward will be apllied each iter_size iterations, it simulate a greater batch size : its value is batch_size\\*iter_size"},"load_iter":{"default":0,"type":"integer","description":"which iteration to load? if load_iter \\> 0, the code will load models by iter_[load_iter]; otherwise, the code will load models by [epoch]"},"lr_decay_iters":{"default":50,"type":"integer","description":"multiply by a gamma every lr_decay_iters iterations"},"lr_policy":{"default":"linear","type":"string","description":"learning rate policy.","enum":["linear","step","plateau","cosine"]},"metrics_every":{"default":1000,"type":"integer","description":"compute metrics every N iterations"},"metrics_list":{"default":["FID"],"type":"array","items":{"enum":null,"type":"string"},"description":"metrics on results quality to compute","enum":["FID","KID","MSID","PSNR","LPIPS"]},"mm_lambda_z":{"default":0.5,"type":"number","description":"weight for random z loss"},"mm_nz":{"default":8,"type":"integer","description":"number of latent vectors"},"n_epochs":{"default":100,"type":"integer","description":"number of epochs with the initial learning rate"},"n_epochs_decay":{"default":100,"type":"integer","description":"number of epochs to linearly decay learning rate to zero"},"nb_img_max_fid":{"default":1000000000,"type":"integer","description":"Maximum number of samples allowed per dataset to compute fid. If the dataset directory contains more than nb_img_max_fid, only a subset is used."},"optim":{"default":"adam","type":"string","description":"optimizer (adam, radam, adamw, ...)","enum":["adam","radam","adamw","lion"]},"optim_eps":{"default":1e-08,"type":"number","description":"epsilon for optimizer"},"optim_weight_decay":{"default":0.0,"type":"number","description":"weight decay for optimizer"},"pool_size":{"default":50,"type":"integer","description":"the size of image buffer that stores previously generated images"},"save_by_iter":{"default":false,"type":"boolean","description":"whether saves model by iteration"},"save_epoch_freq":{"default":1,"type":"integer","description":"frequency of saving checkpoints at the end of epochs"},"save_latest_freq":{"default":5000,"type":"integer","description":"frequency of saving the latest results"},"semantic_cls":{"default":false,"type":"boolean","description":"if true semantic class losses will be used"},"semantic_mask":{"default":false,"type":"boolean","description":"if true semantic mask losses will be used"},"temporal_criterion":{"default":false,"type":"boolean","description":"if true, MSE loss will be computed between successive frames"},"temporal_criterion_lambda":{"default":1.0,"type":"number","description":"lambda for MSE loss that will be computed between successive frames"},"use_contrastive_loss_D":{"default":false,"type":"boolean","description":""}}},"dataaug":{"title":"Data augmentation","type":"object","properties":{"APA":{"default":false,"type":"boolean","description":"if true, G will be used as augmentation during D training adaptively to D overfitting between real and fake images"},"APA_every":{"default":4,"type":"integer","description":"How often to perform APA adjustment?"},"APA_nimg":{"default":50,"type":"integer","description":"APA adjustment speed, measured in how many images it takes for p to increase/decrease by one unit."},"APA_p":{"default":0,"type":"integer","description":"initial value of probability APA"},"APA_target":{"default":0.6,"type":"number","description":""},"D_diffusion":{"default":false,"type":"boolean","description":"whether to apply diffusion noise augmentation to discriminator inputs, projected discriminator only"},"D_diffusion_every":{"default":4,"type":"integer","description":"How often to perform diffusion augmentation adjustment"},"D_label_smooth":{"default":false,"type":"boolean","description":"whether to use one-sided label smoothing with discriminator"},"D_noise":{"default":0.0,"type":"number","description":"whether to add instance noise to discriminator inputs"},"affine":{"default":0.0,"type":"number","description":"if specified, apply random affine transforms to the images for data augmentation"},"affine_scale_max":{"default":1.2,"type":"number","description":"if random affine specified, max scale range value"},"affine_scale_min":{"default":0.8,"type":"number","description":"if random affine specified, min scale range value"},"affine_shear":{"default":45,"type":"integer","description":"if random affine specified, shear range (0,value)"},"affine_translate":{"default":0.2,"type":"number","description":"if random affine specified, translation range (-value\\*img_size,+value\\*img_size) value"},"diff_aug_policy":{"default":"","type":"string","description":"choose the augmentation policy : color randaffine randperspective. If you want more than one, please write them separated by a comma with no space (e.g. color,randaffine)"},"diff_aug_proba":{"default":0.5,"type":"number","description":"proba of using each transformation"},"imgaug":{"default":false,"type":"boolean","description":"whether to apply random image augmentation"},"no_flip":{"default":false,"type":"boolean","description":"if specified, do not flip the images for data augmentation"},"no_rotate":{"default":false,"type":"boolean","description":"if specified, do not rotate the images for data augmentation"}}},"checkpoints_dir":{"default":"./checkpoints","type":"string","description":"models are saved here"},"dataroot":{"default":"None","type":"string","description":"path to images (should have subfolders trainA, trainB, valA, valB, etc)"},"ddp_port":{"default":"12355","type":"string","description":""},"gpu_ids":{"default":"0","type":"string","description":"gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU"},"model_type":{"default":"cut","type":"string","description":"chooses which model to use.","enum":["cut","cycle_gan","palette"]},"name":{"default":"experiment_name","type":"string","description":"name of the experiment. It decides where to store samples and models"},"phase":{"default":"train","type":"string","description":"train, val, test, etc"},"suffix":{"default":"","type":"string","description":"customized suffix: opt.name = opt.name + suffix: e.g., {model}_{netG}_size{load_size}"},"test_batch_size":{"default":1,"type":"integer","description":"input batch size"},"warning_mode":{"default":false,"type":"boolean","description":"whether to display warning"},"with_amp":{"default":false,"type":"boolean","description":"whether to activate torch amp on forward passes"},"with_tf32":{"default":false,"type":"boolean","description":"whether to activate tf32 for faster computations (Ampere GPU and beyond only)"},"with_torch_compile":{"default":false,"type":"boolean","description":"whether to activate torch.compile for some forward and backward functions (experimental)"}}}},"definitions":{"ServerTrainOptions":{"title":"ServerTrainOptions","type":"object","properties":{"sync":{"title":"Sync","description":"if false, the call returns immediately and train process is executed in the background. If true, the call returns only when training process is finished","default":false,"type":"boolean"}}}}}}},"definitions":{"ServerTrainOptions":{"title":"ServerTrainOptions","type":"object","properties":{"sync":{"title":"Sync","description":"if false, the call returns immediately and train process is executed in the background. If true, the call returns only when training process is finished","default":false,"type":"boolean"}}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"JoliGEN server","description":"*commit:* [ce5e4ac1](https://github.com/jolibrain/joliGEN/commit/ce5e4ac14688cac3f60e690898f0d87f091559c1)\n\nThis is the JoliGEN server API documentation.\n","version":"0.1.0"},"paths":{"/train/{name}":{"get":{"summary":"Get the status of a training process","operationId":"get_train_train__name__get","parameters":[{"required":true,"schema":{"type":"string","title":"Name"},"name":"name","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"summary":"Start a training process with given name.","description":"The training process will be created using the same options as command line","operationId":"train_train__name__post","parameters":[{"required":true,"schema":{"type":"string","title":"Name"},"name":"name","in":"path"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/TrainOptions"}}}},"responses":{"201":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"summary":"Delete a training process.","description":"If the process is running, it will be stopped.","operationId":"delete_train_train__name__delete","parameters":[{"required":true,"schema":{"type":"string","title":"Name"},"name":"name","in":"path"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/train":{"get":{"summary":"Get the status of all training processes","operationId":"get_train_processes_train_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/info":{"get":{"summary":"Get the server status","operationId":"get_info_info_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/fs/":{"delete":{"summary":"Delete a file or a directory in the filesystem","description":"This endpoint can be dangerous, use it with extreme caution","operationId":"delete_path_fs__delete","parameters":[{"required":true,"schema":{"type":"string","title":"Path"},"name":"path","in":"query"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"TrainOptions":{"title":"TrainBody","type":"object","properties":{"server":{"title":"Server","default":{"sync":false},"allOf":[{"$ref":"#/definitions/ServerTrainOptions"}]},"train_options":{"title":"TrainOptions","type":"object","properties":{"D":{"title":"Discriminator","type":"object","properties":{"dropout":{"default":false,"type":"boolean","description":"whether to use dropout in the discriminator"},"n_layers":{"default":3,"type":"integer","description":"only used if netD==n_layers"},"ndf":{"default":64,"type":"integer","description":"\\# of discrim filters in the first conv layer"},"netDs":{"default":["projected_d","basic"],"type":"array","items":{"enum":null,"type":"string"},"description":"specify discriminator architecture, another option, --D_n_layers allows you to specify the layers in the n_layers discriminator. NB: duplicated arguments are ignored. Values: basic, n_layers, pixel, projected_d, temporal, vision_aided, depth, mask, sam"},"no_antialias":{"default":false,"type":"boolean","description":"if specified, use stride=2 convs instead of antialiased-downsampling (sad)"},"no_antialias_up":{"default":false,"type":"boolean","description":"if specified, use [upconv(learned filter)] instead of [upconv(hard-coded [1,3,3,1] filter), conv]"},"norm":{"default":"instance","type":"string","description":"instance normalization or batch normalization for D","enum":["instance","batch","none"]},"proj_config_segformer":{"default":"models/configs/segformer/segformer_config_b0.json","type":"string","description":"path to segformer configuration file"},"proj_interp":{"default":-1,"type":"integer","description":"whether to force projected discriminator interpolation to a value \\> 224, -1 means no interpolation"},"proj_network_type":{"default":"efficientnet","type":"string","description":"projected discriminator architecture","enum":["efficientnet","segformer","vitbase","vitsmall","vitsmall2","vitclip16","vitclip14","depth","dinov2_vits14","dinov2_vitb14","dinov2_vitl14","dinov2_vitg14","dinov2_vits14_reg","dinov2_vitb14_reg","dinov2_vitl14_reg","dinov2_vitg14_reg"]},"proj_weight_segformer":{"default":"models/configs/segformer/pretrain/segformer_mit-b0.pth","type":"string","description":"path to segformer weight"},"spectral":{"default":false,"type":"boolean","description":"whether to use spectral norm in the discriminator"},"temporal_every":{"default":4,"type":"integer","description":"apply temporal discriminator every x steps"},"vision_aided_backbones":{"default":"clip+dino+swin","type":"string","description":"specify vision aided discriminators architectures, they are frozen then output are combined and fitted with a linear network on top, choose from dino, clip, swin, det_coco, seg_ade and combine them with +"},"weight_sam":{"default":"","type":"string","description":"path to sam weight for D, e.g. models/configs/sam/pretrain/sam_vit_b_01ec64.pth, or models/configs/sam/pretrain/mobile_sam.pt for MobileSAM"}}},"G":{"title":"Generator","type":"object","properties":{"attn_nb_mask_attn":{"default":10,"type":"integer","description":"number of attention masks in _attn model architectures"},"attn_nb_mask_input":{"default":1,"type":"integer","description":"number of mask dedicated to input in _attn model architectures"},"backward_compatibility_twice_resnet_blocks":{"default":false,"type":"boolean","description":"if true, feats will go througt resnet blocks two times for resnet_attn generators. This option will be deleted, it's for backward compatibility (old models were trained that way)."},"config_segformer":{"default":"models/configs/segformer/segformer_config_b0.json","type":"string","description":"path to segformer configuration file for G"},"diff_n_timestep_test":{"default":1000,"type":"integer","description":"Number of timesteps used for UNET mha inference (test time)."},"diff_n_timestep_train":{"default":2000,"type":"integer","description":"Number of timesteps used for UNET mha training."},"dropout":{"default":false,"type":"boolean","description":"dropout for the generator"},"nblocks":{"default":9,"type":"integer","description":"\\# of layer blocks in G, applicable to resnets"},"netE":{"default":"resnet_256","type":"string","description":"specify multimodal latent vector encoder","enum":["resnet_128","resnet_256","resnet_512","conv_128","conv_256","conv_512"]},"netG":{"default":"mobile_resnet_attn","type":"string","description":"specify generator architecture","enum":["resnet","resnet_attn","mobile_resnet","mobile_resnet_attn","unet_256","unet_128","segformer_attn_conv","segformer_conv","ittr","unet_mha","uvit","unet_mha_ref_attn"]},"ngf":{"default":64,"type":"integer","description":"\\# of gen filters in the last conv layer"},"norm":{"default":"instance","type":"string","description":"instance normalization or batch normalization for G","enum":["instance","batch","none"]},"padding_type":{"default":"reflect","type":"string","description":"whether to use padding in the generator","enum":["reflect","replicate","zeros"]},"spectral":{"default":false,"type":"boolean","description":"whether to use spectral norm in the generator"},"unet_mha_attn_res":{"default":[16],"type":"array","items":{"enum":null,"type":"string"},"description":"downrate samples at which attention takes place"},"unet_mha_channel_mults":{"default":[1,2,4,8],"type":"array","items":{"enum":null,"type":"string"},"description":"channel multiplier for each level of the UNET mha"},"unet_mha_group_norm_size":{"default":32,"type":"integer","description":""},"unet_mha_norm_layer":{"default":"groupnorm","type":"string","description":"","enum":["groupnorm","batchnorm","layernorm","instancenorm","switchablenorm"]},"unet_mha_num_head_channels":{"default":32,"type":"integer","description":"number of channels in each head of the mha architecture"},"unet_mha_num_heads":{"default":1,"type":"integer","description":"number of heads in the mha architecture"},"unet_mha_res_blocks":{"default":[2,2,2,2],"type":"array","items":{"enum":null,"type":"string"},"description":"distribution of resnet blocks across the UNet stages, should have same size as --G_unet_mha_channel_mults"},"unet_mha_vit_efficient":{"default":false,"type":"boolean","description":"if true, use efficient attention in UNet and UViT"},"uvit_num_transformer_blocks":{"default":6,"type":"integer","description":"Number of transformer blocks in UViT"}}},"alg":{"title":"Algorithm-specific","type":"object","properties":{"gan":{"title":"GAN model","type":"object","properties":{"lambda":{"default":1.0,"type":"number","description":"weight for GAN loss:GAN(G(X))"}}},"cut":{"title":"CUT model","type":"object","properties":{"HDCE_gamma":{"default":1.0,"type":"number","description":""},"HDCE_gamma_min":{"default":1.0,"type":"number","description":""},"MSE_idt":{"default":false,"type":"boolean","description":"use MSENCE loss for identity mapping: MSE(G(Y), Y))"},"flip_equivariance":{"default":false,"type":"boolean","description":"Enforce flip-equivariance as additional regularization. It's used by FastCUT, but not CUT"},"lambda_MSE_idt":{"default":1.0,"type":"number","description":"weight for MSE identity loss: MSE(G(X), X)"},"lambda_NCE":{"default":1.0,"type":"number","description":"weight for NCE loss: NCE(G(X), X)"},"lambda_SRC":{"default":0.0,"type":"number","description":"weight for SRC (semantic relation consistency) loss: NCE(G(X), X)"},"nce_T":{"default":0.07,"type":"number","description":"temperature for NCE loss"},"nce_idt":{"default":true,"type":"boolean","description":"use NCE loss for identity mapping: NCE(G(Y), Y))"},"nce_includes_all_negatives_from_minibatch":{"default":false,"type":"boolean","description":"(used for single image translation) If True, include the negatives from the other samples of the minibatch when computing the contrastive loss. Please see models/patchnce.py for more details."},"nce_layers":{"default":"0,4,8,12,16","type":"string","description":"compute NCE loss on which layers"},"nce_loss":{"default":"monce","type":"string","description":"CUT contrastice loss","enum":["patchnce","monce","SRC_hDCE"]},"netF":{"default":"mlp_sample","type":"string","description":"how to downsample the feature map","enum":["sample","mlp_sample","sample_qsattn","mlp_sample_qsattn"]},"netF_dropout":{"default":false,"type":"boolean","description":"whether to use dropout with F"},"netF_nc":{"default":256,"type":"integer","description":""},"netF_norm":{"default":"instance","type":"string","description":"instance normalization or batch normalization for F","enum":["instance","batch","none"]},"num_patches":{"default":256,"type":"integer","description":"number of patches per layer"}}},"cyclegan":{"title":"CycleGAN model","type":"object","properties":{"lambda_A":{"default":10.0,"type":"number","description":"weight for cycle loss (A -\\> B -\\> A)"},"lambda_B":{"default":10.0,"type":"number","description":"weight for cycle loss (B -\\> A -\\> B)"},"lambda_identity":{"default":0.5,"type":"number","description":"use identity mapping. Setting lambda_identity other than 0 has an effect of scaling the weight of the identity mapping loss. For example, if the weight of the identity loss should be 10 times smaller than the weight of the reconstruction loss, please set lambda_identity = 0.1"},"rec_noise":{"default":0.0,"type":"number","description":"whether to add noise to reconstruction"}}},"re":{"title":"ReCUT / ReCycleGAN","type":"object","properties":{"P_lr":{"default":0.0002,"type":"number","description":"initial learning rate for P networks"},"adversarial_loss_p":{"default":false,"type":"boolean","description":"if True, also train the prediction model with an adversarial loss"},"netP":{"default":"unet_128","type":"string","description":"specify P architecture","enum":["resnet_9blocks","resnet_6blocks","resnet_attn","unet_256","unet_128"]},"no_train_P_fake_images":{"default":false,"type":"boolean","description":"if True, P wont be trained over fake images projections"},"nuplet_size":{"default":3,"type":"integer","description":"Number of frames loaded"},"projection_threshold":{"default":1.0,"type":"number","description":"threshold of the real images projection loss below with fake projection and fake reconstruction losses are applied"}}},"palette":{"title":"Diffusion model","type":"object","properties":{"computed_sketch_list":{"default":["canny","hed"],"type":"array","items":{"enum":null,"type":"string"},"description":"what primitives to use for random sketch"},"cond_embed_dim":{"default":32,"type":"integer","description":"nb of examples processed for inference"},"cond_image_creation":{"default":"y_t","type":"string","description":"how image conditioning is created: either from y_t (no conditioning), previous frame, from computed sketch (e.g. canny), from low res image or from reference image (i.e. image that is not aligned with the ground truth)","enum":["y_t","previous_frame","computed_sketch","low_res","ref"]},"conditioning":{"default":"","type":"string","description":"whether to use conditioning or not","enum":["","mask","class","mask_and_class","ref"]},"ddim_eta":{"default":0.5,"type":"number","description":"eta for ddim sampling variance"},"ddim_num_steps":{"default":10,"type":"integer","description":"number of steps for ddim sampling"},"dropout_prob":{"default":0.0,"type":"number","description":"dropout probability for classifier-free guidance"},"generate_per_class":{"default":false,"type":"boolean","description":"whether to generate samples of each images"},"inference_num":{"default":-1,"type":"integer","description":"nb of examples processed for inference"},"lambda_G":{"default":1.0,"type":"number","description":"weight for supervised loss"},"loss":{"default":"MSE","type":"string","description":"loss type of the denoising model","enum":["L1","MSE","multiscale"]},"prob_use_previous_frame":{"default":0.5,"type":"number","description":"prob to use previous frame as y cond"},"ref_embed_net":{"default":"clip","type":"string","description":"embedding network to use for ref conditioning","enum":["clip","imagebind"]},"sam_crop_delta":{"default":true,"type":"boolean","description":"extend crop's width and height by 2\\*crop_delta before computing masks"},"sam_final_canny":{"default":false,"type":"boolean","description":"whether to perform a Canny edge detection on sam sketch to soften the edges"},"sam_max_mask_area":{"default":0.99,"type":"number","description":"maximum area in proportion of image size for a mask to be kept"},"sam_min_mask_area":{"default":0.001,"type":"number","description":"minimum area in proportion of image size for a mask to be kept"},"sam_no_output_binary_sam":{"default":false,"type":"boolean","description":"whether to not output binary sketch before Canny"},"sam_no_sample_points_in_ellipse":{"default":false,"type":"boolean","description":"whether to not sample the points inside an ellipse to avoid the corners of the image"},"sam_no_sobel_filter":{"default":false,"type":"boolean","description":"whether to not use a Sobel filter on each SAM masks"},"sam_points_per_side":{"default":16,"type":"integer","description":"number of points per side of image to prompt SAM with (\\# of prompted points will be points_per_side\\*\\*2)"},"sam_redundancy_threshold":{"default":0.62,"type":"number","description":"redundancy threshold above which redundant masks are not kept"},"sam_sobel_threshold":{"default":0.7,"type":"number","description":"sobel threshold in % of gradient magintude"},"sam_use_gaussian_filter":{"default":false,"type":"boolean","description":"whether to apply a Gaussian blur to each SAM masks"},"sampling_method":{"default":"ddpm","type":"string","description":"choose the sampling method between ddpm and ddim","enum":["ddpm","ddim"]},"sketch_canny_range":{"default":[0,765],"type":"array","items":{"enum":null,"type":"string"},"description":"range of randomized canny sketch thresholds"},"super_resolution_scale":{"default":2.0,"type":"number","description":"scale for super resolution"},"task":{"default":"inpainting","type":"string","description":"Whether to perform inpainting, super resolution or pix2pix","enum":["inpainting","super_resolution","pix2pix"]}}}}},"data":{"title":"Datasets","type":"object","properties":{"online_creation":{"title":"Online created datasets","type":"object","properties":{"color_mask_A":{"default":false,"type":"boolean","description":"Perform task of replacing color-filled masks by objects"},"crop_delta_A":{"default":50,"type":"integer","description":"size of crops are random, values allowed are online_creation_crop_size more or less online_creation_crop_delta for domain A"},"crop_delta_B":{"default":50,"type":"integer","description":"size of crops are random, values allowed are online_creation_crop_size more or less online_creation_crop_delta for domain B"},"crop_size_A":{"default":512,"type":"integer","description":"crop to this size during online creation, it needs to be greater than bbox size for domain A"},"crop_size_B":{"default":512,"type":"integer","description":"crop to this size during online creation, it needs to be greater than bbox size for domain B"},"load_size_A":{"default":[],"type":"array","items":{"enum":null,"type":"string"},"description":"load to this size during online creation, format : width height or only one size if square"},"load_size_B":{"default":[],"type":"array","items":{"enum":null,"type":"string"},"description":"load to this size during online creation, format : width height or only one size if square"},"mask_delta_A":{"default":[[]],"type":"array","items":{"enum":null,"type":"string"},"description":"mask offset (in pixels) to allow generation of a bigger object in domain B (for semantic loss) for domain A, format : 'width (x),height (y)' for each class or only one size if square, e.g. '125, 55 100, 100' for 2 classes"},"mask_delta_A_ratio":{"default":[[]],"type":"array","items":{"enum":null,"type":"string"},"description":"ratio mask offset to allow generation of a bigger object in domain B (for semantic loss) for domain A, format : width (x),height (y) for each class or only one size if square"},"mask_delta_B":{"default":[[]],"type":"array","items":{"enum":null,"type":"string"},"description":"mask offset (in pixels) to allow generation of a bigger object in domain A (for semantic loss) for domain B, format : 'width (x),height (y)' for each class or only one size if square, e.g. '125, 55 100, 100' for 2 classes"},"mask_delta_B_ratio":{"default":[[]],"type":"array","items":{"enum":null,"type":"string"},"description":"ratio mask offset to allow generation of a bigger object in domain A (for semantic loss) for domain B, format : 'width (x),height (y)' for each class or only one size if square"},"mask_random_offset_A":{"default":[0.0],"type":"array","items":{"enum":null,"type":"string"},"description":"ratio mask size randomization (only to make bigger one) to robustify the image generation in domain A, format : width (x) height (y) or only one size if square"},"mask_random_offset_B":{"default":[0.0],"type":"array","items":{"enum":null,"type":"string"},"description":"mask size randomization (only to make bigger one) to robustify the image generation in domain B, format : width (y) height (x) or only one size if square"},"mask_square_A":{"default":false,"type":"boolean","description":"whether masks should be squared for domain A"},"mask_square_B":{"default":false,"type":"boolean","description":"whether masks should be squared for domain B"},"rand_mask_A":{"default":false,"type":"boolean","description":"Perform task of replacing noised masks by objects"}}},"crop_size":{"default":256,"type":"integer","description":"then crop to this size"},"dataset_mode":{"default":"unaligned","type":"string","description":"chooses how datasets are loaded.","enum":["unaligned","unaligned_labeled_cls","unaligned_labeled_mask","self_supervised_labeled_mask","unaligned_labeled_mask_cls","self_supervised_labeled_mask_cls","unaligned_labeled_mask_online","self_supervised_labeled_mask_online","unaligned_labeled_mask_cls_online","self_supervised_labeled_mask_cls_online","aligned","nuplet_unaligned_labeled_mask","temporal_labeled_mask_online","self_supervised_temporal","single","unaligned_labeled_mask_ref","self_supervised_labeled_mask_ref","unaligned_labeled_mask_online_ref","self_supervised_labeled_mask_online_ref"]},"direction":{"default":"AtoB","type":"string","description":"AtoB or BtoA","enum":["AtoB","BtoA"]},"inverted_mask":{"default":false,"type":"boolean","description":"whether to invert the mask, i.e. around the bbox"},"load_size":{"default":286,"type":"integer","description":"scale images to this size"},"max_dataset_size":{"default":1000000000,"type":"integer","description":"Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded."},"num_threads":{"default":4,"type":"integer","description":"\\# threads for loading data"},"online_context_pixels":{"default":0,"type":"integer","description":"context pixel band around the crop, unused for generation, only for disc "},"online_fixed_mask_size":{"default":-1,"type":"integer","description":"if \\>0, it will be used as fixed bbox size (warning: in dataset resolution ie before resizing) "},"online_select_category":{"default":-1,"type":"integer","description":"category to select for bounding boxes, -1 means all boxes selected"},"online_single_bbox":{"default":false,"type":"boolean","description":"whether to only allow a single bbox per online crop"},"preprocess":{"default":"resize_and_crop","type":"string","description":"scaling and cropping of images at load time","enum":["resize_and_crop","crop","scale_width","scale_width_and_crop","none"]},"refined_mask":{"default":false,"type":"boolean","description":"whether to use refined mask with sam"},"relative_paths":{"default":false,"type":"boolean","description":"whether paths to images are relative to dataroot"},"sanitize_paths":{"default":false,"type":"boolean","description":"if true, wrong images or labels paths will be removed before training"},"serial_batches":{"default":false,"type":"boolean","description":"if true, takes images in order to make batches, otherwise takes them randomly"},"temporal_frame_step":{"default":30,"type":"integer","description":"how many frames between successive frames selected"},"temporal_num_common_char":{"default":-1,"type":"integer","description":"how many characters (the first ones) are used to identify a video; if =-1 natural sorting is used "},"temporal_number_frames":{"default":5,"type":"integer","description":"how many successive frames use for temporal loader"}}},"f_s":{"title":"Semantic segmentation network","type":"object","properties":{"all_classes_as_one":{"default":false,"type":"boolean","description":"if true, all classes will be considered as the same one (ie foreground vs background)"},"class_weights":{"default":[],"type":"array","items":{"enum":null,"type":"string"},"description":"class weights for imbalanced semantic classes"},"config_segformer":{"default":"models/configs/segformer/segformer_config_b0.json","type":"string","description":"path to segformer configuration file for f_s"},"dropout":{"default":false,"type":"boolean","description":"dropout for the semantic network"},"net":{"default":"vgg","type":"string","description":"specify f_s network [vgg|unet|segformer|sam]","enum":["vgg","unet","segformer","sam"]},"nf":{"default":64,"type":"integer","description":"\\# of filters in the first conv layer of classifier"},"semantic_nclasses":{"default":2,"type":"integer","description":"number of classes of the semantic loss classifier"},"semantic_threshold":{"default":1.0,"type":"number","description":"threshold of the semantic classifier loss below with semantic loss is applied"},"weight_sam":{"default":"","type":"string","description":"path to sam weight for f_s, e.g. models/configs/sam/pretrain/sam_vit_b_01ec64.pth, or models/configs/sam/pretrain/mobile_sam.pt for MobileSAM"},"weight_segformer":{"default":"","type":"string","description":"path to segformer weight for f_s, e.g. models/configs/segformer/pretrain/segformer_mit-b0.pth"}}},"cls":{"title":"Semantic classification network","type":"object","properties":{"all_classes_as_one":{"default":false,"type":"boolean","description":"if true, all classes will be considered as the same one (ie foreground vs background)"},"class_weights":{"default":[],"type":"array","items":{"enum":null,"type":"string"},"description":"class weights for imbalanced semantic classes"},"config_segformer":{"default":"models/configs/segformer/segformer_config_b0.json","type":"string","description":"path to segformer configuration file for cls"},"dropout":{"default":false,"type":"boolean","description":"dropout for the semantic network"},"net":{"default":"vgg","type":"string","description":"specify cls network [vgg|unet|segformer]","enum":["vgg","unet","segformer"]},"nf":{"default":64,"type":"integer","description":"\\# of filters in the first conv layer of classifier"},"semantic_nclasses":{"default":2,"type":"integer","description":"number of classes of the semantic loss classifier"},"semantic_threshold":{"default":1.0,"type":"number","description":"threshold of the semantic classifier loss below with semantic loss is applied"},"weight_segformer":{"default":"","type":"string","description":"path to segformer weight for cls, e.g. models/configs/segformer/pretrain/segformer_mit-b0.pth"}}},"output":{"title":"Output","type":"object","properties":{"display":{"title":"Visdom display","type":"object","properties":{"G_attention_masks":{"default":false,"type":"boolean","description":""},"aim_port":{"default":53800,"type":"integer","description":"aim port of the web display"},"aim_server":{"default":"http://localhost","type":"string","description":"aim server of the web display"},"diff_fake_real":{"default":false,"type":"boolean","description":"if True x - G(x) is displayed"},"env":{"default":"","type":"string","description":"visdom display environment name (default is \"main\")"},"freq":{"default":400,"type":"integer","description":"frequency of showing training results on screen"},"id":{"default":1,"type":"integer","description":"window id of the web display"},"ncols":{"default":0,"type":"integer","description":"if positive, display all images in a single visdom web panel with certain number of images per row.(if == 0 ncols will be computed automatically)"},"networks":{"default":false,"type":"boolean","description":"Set True if you want to display networks on port 8000"},"type":{"default":["visdom"],"type":"array","items":{"enum":null,"type":"string"},"description":"output display, either visdom, aim or no output","enum":["visdom","aim","none"]},"visdom_autostart":{"default":false,"type":"boolean","description":"whether to start a visdom server automatically"},"visdom_port":{"default":8097,"type":"integer","description":"visdom port of the web display"},"visdom_server":{"default":"http://localhost","type":"string","description":"visdom server of the web display"},"winsize":{"default":256,"type":"integer","description":"display window size for both visdom and HTML"}}},"no_html":{"default":false,"type":"boolean","description":"do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/"},"print_freq":{"default":100,"type":"integer","description":"frequency of showing training results on console"},"update_html_freq":{"default":1000,"type":"integer","description":"frequency of saving training results to html"},"verbose":{"default":false,"type":"boolean","description":"if specified, print more debugging information"}}},"model":{"title":"Model","type":"object","properties":{"depth_network":{"default":"DPT_Large","type":"string","description":"specify depth prediction network architecture","enum":["DPT_Large","DPT_Hybrid","MiDaS_small","DPT_BEiT_L_512","DPT_BEiT_L_384","DPT_BEiT_B_384","DPT_SwinV2_L_384","DPT_SwinV2_B_384","DPT_SwinV2_T_256","DPT_Swin_L_384","DPT_Next_ViT_L_384","DPT_LeViT_224"]},"init_gain":{"default":0.02,"type":"number","description":"scaling factor for normal, xavier and orthogonal."},"init_type":{"default":"normal","type":"string","description":"network initialization","enum":["normal","xavier","kaiming","orthogonal"]},"input_nc":{"default":3,"type":"integer","description":"\\# of input image channels: 3 for RGB and 1 for grayscale","enum":[1,3]},"multimodal":{"default":false,"type":"boolean","description":"multimodal model with random latent input vector"},"output_nc":{"default":3,"type":"integer","description":"\\# of output image channels: 3 for RGB and 1 for grayscale","enum":[1,3]},"prior_321_backwardcompatibility":{"default":false,"type":"boolean","description":"whether to load models from previous version of JG."},"type_sam":{"default":"mobile_sam","type":"string","description":"which model to use for segment-anything mask generation","enum":["sam","mobile_sam"]}}},"train":{"title":"Training","type":"object","properties":{"sem":{"title":"Semantic training","type":"object","properties":{"cls_B":{"default":false,"type":"boolean","description":"if true cls will be trained not only on domain A but also on domain B"},"cls_lambda":{"default":1.0,"type":"number","description":"weight for semantic class loss"},"cls_pretrained":{"default":false,"type":"boolean","description":"whether to use a pretrained model, available for non \"basic\" model only"},"cls_template":{"default":"basic","type":"string","description":"classifier/regressor model type, from torchvision (resnet18, ...), default is custom simple model"},"idt":{"default":false,"type":"boolean","description":"if true apply semantic loss on identity"},"lr_cls":{"default":0.0002,"type":"number","description":"cls learning rate"},"lr_f_s":{"default":0.0002,"type":"number","description":"f_s learning rate"},"mask_lambda":{"default":1.0,"type":"number","description":"weight for semantic mask loss"},"net_output":{"default":false,"type":"boolean","description":"if true apply generator semantic loss on network output for real image rather than on label."},"use_label_B":{"default":false,"type":"boolean","description":"if true domain B has labels too"}}},"mask":{"title":"Semantic training with masks","type":"object","properties":{"charbonnier_eps":{"default":1e-06,"type":"number","description":"Charbonnier loss epsilon value"},"compute_miou":{"default":false,"type":"boolean","description":"whether to compute mIoU on semantic masks prediction"},"disjoint_f_s":{"default":false,"type":"boolean","description":"whether to use a disjoint f_s with the same exact structure"},"f_s_B":{"default":false,"type":"boolean","description":"if true f_s will be trained not only on domain A but also on domain B"},"for_removal":{"default":false,"type":"boolean","description":"if true, object removal mode, domain B images with label 0, cut models only"},"lambda_out_mask":{"default":10.0,"type":"number","description":"weight for loss out mask"},"loss_out_mask":{"default":"L1","type":"string","description":"loss for out mask content (which should not change).","enum":["L1","MSE","Charbonnier"]},"miou_every":{"default":1000,"type":"integer","description":"compute mIoU every n iterations"},"no_train_f_s_A":{"default":false,"type":"boolean","description":"if true f_s wont be trained on domain A"},"out_mask":{"default":false,"type":"boolean","description":"use loss out mask"}}},"D_accuracy_every":{"default":1000,"type":"integer","description":"compute D accuracy every N iterations"},"D_lr":{"default":0.0001,"type":"number","description":"discriminator separate learning rate"},"G_ema":{"default":false,"type":"boolean","description":"whether to build G via exponential moving average"},"G_ema_beta":{"default":0.999,"type":"number","description":"exponential decay for ema"},"G_lr":{"default":0.0002,"type":"number","description":"initial learning rate for generator"},"batch_size":{"default":1,"type":"integer","description":"input batch size"},"beta1":{"default":0.9,"type":"number","description":"momentum term of adam"},"beta2":{"default":0.999,"type":"number","description":"momentum term of adam"},"cls_l1_regression":{"default":false,"type":"boolean","description":"if true l1 loss will be used to compute regressor loss"},"cls_regression":{"default":false,"type":"boolean","description":"if true cls will be a regressor and not a classifier"},"compute_D_accuracy":{"default":false,"type":"boolean","description":"whether to compute D accuracy explicitely"},"compute_metrics_test":{"default":false,"type":"boolean","description":"whether to compute test metrics, e.g. FID, ..."},"continue":{"default":false,"type":"boolean","description":"continue training: load the latest model"},"epoch":{"default":"latest","type":"string","description":"which epoch to load? set to latest to use latest cached model"},"epoch_count":{"default":1,"type":"integer","description":"the starting epoch count, we save the model by \\, \\+\\, ..."},"export_jit":{"default":false,"type":"boolean","description":"whether to export model in jit format"},"feat_wavelet":{"default":false,"type":"boolean","description":"if true, train in wavelet features space (Note: this may not include all discriminators, when training GANs)"},"gan_mode":{"default":"lsgan","type":"string","description":"the type of GAN objective. vanilla GAN loss is the cross-entropy objective used in the original GAN paper.","enum":["vanilla","lsgan","wgangp","projected"]},"iter_size":{"default":1,"type":"integer","description":"backward will be apllied each iter_size iterations, it simulate a greater batch size : its value is batch_size\\*iter_size"},"load_iter":{"default":0,"type":"integer","description":"which iteration to load? if load_iter \\> 0, the code will load models by iter_[load_iter]; otherwise, the code will load models by [epoch]"},"lr_decay_iters":{"default":50,"type":"integer","description":"multiply by a gamma every lr_decay_iters iterations"},"lr_policy":{"default":"linear","type":"string","description":"learning rate policy.","enum":["linear","step","plateau","cosine"]},"metrics_every":{"default":1000,"type":"integer","description":"compute metrics every N iterations"},"metrics_list":{"default":["FID"],"type":"array","items":{"enum":null,"type":"string"},"description":"metrics on results quality to compute","enum":["FID","KID","MSID","PSNR","LPIPS"]},"mm_lambda_z":{"default":0.5,"type":"number","description":"weight for random z loss"},"mm_nz":{"default":8,"type":"integer","description":"number of latent vectors"},"n_epochs":{"default":100,"type":"integer","description":"number of epochs with the initial learning rate"},"n_epochs_decay":{"default":100,"type":"integer","description":"number of epochs to linearly decay learning rate to zero"},"nb_img_max_fid":{"default":1000000000,"type":"integer","description":"Maximum number of samples allowed per dataset to compute fid. If the dataset directory contains more than nb_img_max_fid, only a subset is used."},"optim":{"default":"adam","type":"string","description":"optimizer (adam, radam, adamw, ...)","enum":["adam","radam","adamw","lion"]},"optim_eps":{"default":1e-08,"type":"number","description":"epsilon for optimizer"},"optim_weight_decay":{"default":0.0,"type":"number","description":"weight decay for optimizer"},"pool_size":{"default":50,"type":"integer","description":"the size of image buffer that stores previously generated images"},"save_by_iter":{"default":false,"type":"boolean","description":"whether saves model by iteration"},"save_epoch_freq":{"default":1,"type":"integer","description":"frequency of saving checkpoints at the end of epochs"},"save_latest_freq":{"default":5000,"type":"integer","description":"frequency of saving the latest results"},"semantic_cls":{"default":false,"type":"boolean","description":"if true semantic class losses will be used"},"semantic_mask":{"default":false,"type":"boolean","description":"if true semantic mask losses will be used"},"temporal_criterion":{"default":false,"type":"boolean","description":"if true, MSE loss will be computed between successive frames"},"temporal_criterion_lambda":{"default":1.0,"type":"number","description":"lambda for MSE loss that will be computed between successive frames"},"use_contrastive_loss_D":{"default":false,"type":"boolean","description":""}}},"dataaug":{"title":"Data augmentation","type":"object","properties":{"APA":{"default":false,"type":"boolean","description":"if true, G will be used as augmentation during D training adaptively to D overfitting between real and fake images"},"APA_every":{"default":4,"type":"integer","description":"How often to perform APA adjustment?"},"APA_nimg":{"default":50,"type":"integer","description":"APA adjustment speed, measured in how many images it takes for p to increase/decrease by one unit."},"APA_p":{"default":0,"type":"integer","description":"initial value of probability APA"},"APA_target":{"default":0.6,"type":"number","description":""},"D_diffusion":{"default":false,"type":"boolean","description":"whether to apply diffusion noise augmentation to discriminator inputs, projected discriminator only"},"D_diffusion_every":{"default":4,"type":"integer","description":"How often to perform diffusion augmentation adjustment"},"D_label_smooth":{"default":false,"type":"boolean","description":"whether to use one-sided label smoothing with discriminator"},"D_noise":{"default":0.0,"type":"number","description":"whether to add instance noise to discriminator inputs"},"affine":{"default":0.0,"type":"number","description":"if specified, apply random affine transforms to the images for data augmentation"},"affine_scale_max":{"default":1.2,"type":"number","description":"if random affine specified, max scale range value"},"affine_scale_min":{"default":0.8,"type":"number","description":"if random affine specified, min scale range value"},"affine_shear":{"default":45,"type":"integer","description":"if random affine specified, shear range (0,value)"},"affine_translate":{"default":0.2,"type":"number","description":"if random affine specified, translation range (-value\\*img_size,+value\\*img_size) value"},"diff_aug_policy":{"default":"","type":"string","description":"choose the augmentation policy : color randaffine randperspective. If you want more than one, please write them separated by a comma with no space (e.g. color,randaffine)"},"diff_aug_proba":{"default":0.5,"type":"number","description":"proba of using each transformation"},"imgaug":{"default":false,"type":"boolean","description":"whether to apply random image augmentation"},"no_flip":{"default":false,"type":"boolean","description":"if specified, do not flip the images for data augmentation"},"no_rotate":{"default":false,"type":"boolean","description":"if specified, do not rotate the images for data augmentation"}}},"checkpoints_dir":{"default":"./checkpoints","type":"string","description":"models are saved here"},"dataroot":{"default":"None","type":"string","description":"path to images (should have subfolders trainA, trainB, valA, valB, etc)"},"ddp_port":{"default":"12355","type":"string","description":""},"gpu_ids":{"default":"0","type":"string","description":"gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU"},"model_type":{"default":"cut","type":"string","description":"chooses which model to use.","enum":["cut","cycle_gan","palette"]},"name":{"default":"experiment_name","type":"string","description":"name of the experiment. It decides where to store samples and models"},"phase":{"default":"train","type":"string","description":"train, val, test, etc"},"suffix":{"default":"","type":"string","description":"customized suffix: opt.name = opt.name + suffix: e.g., {model}_{netG}_size{load_size}"},"test_batch_size":{"default":1,"type":"integer","description":"input batch size"},"warning_mode":{"default":false,"type":"boolean","description":"whether to display warning"},"with_amp":{"default":false,"type":"boolean","description":"whether to activate torch amp on forward passes"},"with_tf32":{"default":false,"type":"boolean","description":"whether to activate tf32 for faster computations (Ampere GPU and beyond only)"},"with_torch_compile":{"default":false,"type":"boolean","description":"whether to activate torch.compile for some forward and backward functions (experimental)"}}}},"definitions":{"ServerTrainOptions":{"title":"ServerTrainOptions","type":"object","properties":{"sync":{"title":"Sync","description":"if false, the call returns immediately and train process is executed in the background. If true, the call returns only when training process is finished","default":false,"type":"boolean"}}}}}}},"definitions":{"ServerTrainOptions":{"title":"ServerTrainOptions","type":"object","properties":{"sync":{"title":"Sync","description":"if false, the call returns immediately and train process is executed in the background. If true, the call returns only when training process is finished","default":false,"type":"boolean"}}}}} \ No newline at end of file