Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

训练时一直停在Sanity Val #204

Open
XxsStrongest opened this issue May 29, 2024 · 1 comment
Open

训练时一直停在Sanity Val #204

XxsStrongest opened this issue May 29, 2024 · 1 comment

Comments

@XxsStrongest
Copy link

python tasks/run.py --config=egs/datasets/woman/lm3d_radnerf_sr.yaml --exp_name=woman/may_head --reset

nerf_sr.yaml --exp_name=woman/may_head --reset
| set_hparams Unknow hparams: []
| Hparams chains: ['egs/egs_bases/radnerf/base.yaml', 'egs/egs_bases/radnerf/lm3d_radnerf.yaml', 'egs/datasets/woman/lm3d_radnerf.yaml', 'egs/datasets/woman/lm3d_radnerf_sr.yaml']
| Hparams: {
"accumulate_grad_batches": 1,
"add_eye_blink_cond": true,
"ambient_coord_dim": 3,
"ambient_loss_mode": "mae",
"amp": true,
"base_config": [
"./lm3d_radnerf.yaml"
],
"binary_data_dir": "data/binary/videos",
"bound": 1,
"camera_offset": [
0,
0,
0
],
"camera_scale": 4.0,
"clip_grad_norm": 0.0,
"clip_grad_value": 0,
"cond_dropout_rate": 0.0,
"cond_out_dim": 64,
"cond_type": "idexp_lm3d_normalized",
"cond_win_size": 1,
"cuda_ray": true,
"debug": false,
"density_thresh": 10,
"density_thresh_torso": 0.01,
"desired_resolution": 2048,
"dt_gamma": 0.00390625,
"eval_max_batches": 100,
"exp_name": "woman/may_head",
"eye_blink_dim": 2,
"far": 0.9,
"finetune_lips": true,
"finetune_lips_start_iter": 200000,
"geo_feat_dim": 128,
"grid_interpolation_type": "linear",
"grid_size": 128,
"grid_type": "tiledgrid",
"gui_fovy": 21.24,
"gui_h": 512,
"gui_max_spp": 1,
"gui_radius": 3.35,
"gui_w": 512,
"hidden_dim_ambient": 128,
"hidden_dim_color": 128,
"hidden_dim_sigma": 128,
"individual_embedding_dim": 4,
"individual_embedding_num": 13000,
"infer": false,
"infer_audio_source_name": "",
"infer_bg_img_fname": "",
"infer_c2w_name": "",
"infer_cond_name": "",
"infer_lm3d_clamp_std": 1.5,
"infer_lm3d_lle_percent": 0.25,
"infer_lm3d_smooth_sigma": 0.0,
"infer_out_video_name": "",
"infer_scale_factor": 1.0,
"infer_smo_std": 0.0,
"infer_smooth_camera_path": true,
"infer_smooth_camera_path_kernel_size": 7,
"init_method": "tcp",
"lambda_ambient": null,
"lambda_dual_fm": 0.0,
"lambda_lap_ambient_loss": 0.0,
"lambda_lpips_loss": 0.001,
"lambda_weights_entropy": 0.0001,
"load_ckpt": "",
"load_imgs_to_memory": false,
"log2_hashmap_size": 16,
"lpips_mode": "vgg19_v2",
"lpips_start_iters": 200000,
"lr": 0.0005,
"lr_lambda_ambient": 0.01,
"max_ray_batch": 4096,
"max_steps": 16,
"max_updates": 250000,
"min_near": 0.05,
"n_rays": 65536,
"near": 0.3,
"nerf_keypoint_mode": "lm68",
"not_save_modules": [
"criterion_lpips",
"dual_disc"
],
"num_ckpt_keep": 1,
"num_layers_ambient": 3,
"num_layers_color": 2,
"num_layers_sigma": 3,
"num_sanity_val_steps": 2,
"num_steps": 16,
"num_valid_plots": 5,
"optimizer_adam_beta1": 0.9,
"optimizer_adam_beta2": 0.999,
"polygon_face_mask": true,
"print_nan_grads": false,
"processed_data_dir": "data/processed/videos",
"raw_data_dir": "data/raw/videos",
"resume_from_checkpoint": 0,
"save_best": true,
"save_codes": [
"tasks",
"modules",
"egs"
],
"save_gt": true,
"scheduler": "exponential",
"seed": 9999,
"smo_win_size": 3,
"smooth_lips": false,
"sr_start_iters": 0,
"start_rank": 0,
"target_ambient_loss": 1e-08,
"task_cls": "tasks.radnerfs.radnerf_sr.RADNeRFTask",
"tb_log_interval": 100,
"torso_head_aware": false,
"torso_individual_embedding_dim": 8,
"torso_shrink": 0.8,
"update_extra_interval": 16,
"upsample_steps": 0,
"use_window_cond": true,
"val_check_interval": 2000,
"valid_infer_interval": 10000,
"valid_monitor_key": "val_loss",
"valid_monitor_mode": "min",
"validate": false,
"video_id": "woman",
"warmup_updates": 0,
"weight_decay": 0,
"with_att": true,
"with_sr": true,
"work_dir": "checkpoints/woman/may_head",
"world_size": -1,
"zero_dummy": true
}
05/29 03:37:08 PM GPU available: True, GPU used: [0], world_size: 1, multi-machine training: False
/data/workpace/GeneFacePlusPlus/tasks/radnerfs/dataset_utils.py:266: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
self.lm68s = torch.tensor(self.lm2ds[:, index_lm68_from_lm478, :])
val: Smooth head trajectory (rotation and translation) with a window size of 7
| Copied codes to checkpoints/woman/may_head/codes/20240529153717.
| cond_prenet Trainable Parameters: 0.050M
| blink_embedding Trainable Parameters: 0.000M
| blink_encoder Trainable Parameters: 0.001M
| cond_att_net Trainable Parameters: 0.004M
| position_embedder Trainable Parameters: 1.807M
| ambient_net Trainable Parameters: 0.029M
| ambient_embedder Trainable Parameters: 1.807M
| sigma_net Trainable Parameters: 0.041M
| direction_embedder Trainable Parameters: 0.000M
| color_net Trainable Parameters: 0.019M
| dropout Trainable Parameters: 0.000M
| sr_net Trainable Parameters: 0.271M
Sanity Val: 0%| | 0/2 [00:00<?, ?step/s]/data/workpace/GeneFacePlusPlus/tasks/radnerfs/dataset_utils.py:427: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
sample['lm68'] = torch.tensor(self.lm68s[idx].reshape([68*2]))

训练时一直卡在这个地方不动,请问有大佬知道问题原因吗。

@raymondren1982
Copy link

去安装一下rysnc试试

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants