Skip to content

Commit

Permalink
fix errors in DeepLab-V2
Browse files Browse the repository at this point in the history
  • Loading branch information
qjadud1994 committed Sep 1, 2021
1 parent d3e8a7b commit 866f0e7
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 79 deletions.
2 changes: 1 addition & 1 deletion DeepLab-V2-PyTorch/main.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -544,4 +544,4 @@ def process(i):

if __name__ == "__main__":

main()
main()
137 changes: 74 additions & 63 deletions DeepLab-V2-PyTorch/main_v2.py → DeepLab-V2-PyTorch/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from libs.utils import PolynomialLR
from libs.utils.stream_metrics import StreamSegMetrics, AverageMeter


def get_argparser():
parser = argparse.ArgumentParser()

Expand Down Expand Up @@ -148,13 +147,13 @@ def main():
dataset=train_dataset,
batch_size=CONFIG.SOLVER.BATCH_SIZE.TRAIN,
num_workers=CONFIG.DATALOADER.NUM_WORKERS,
shuffle=True,
shuffle=True, pin_memory=True, drop_last=True,
)
valid_loader = torch.utils.data.DataLoader(
dataset=valid_dataset,
batch_size=CONFIG.SOLVER.BATCH_SIZE.TEST,
num_workers=CONFIG.DATALOADER.NUM_WORKERS,
shuffle=False,
shuffle=False, pin_memory=True,
)

# Model check
Expand Down Expand Up @@ -222,30 +221,40 @@ def main():
makedirs(checkpoint_dir)
print("Checkpoint dst:", checkpoint_dir)

model.train()

def set_train(model):
model.train()
model.module.base.freeze_bn()

metrics = StreamSegMetrics(CONFIG.DATASET.N_CLASSES)

scaler = torch.cuda.amp.GradScaler(enabled=opts.amp)
avg_loss = AverageMeter()
avg_time = AverageMeter()

curr_iter = 0
set_train(model)
best_score = 0
end_time = time.time()

while True:
for _, images, labels, cls_labels in train_loader:
curr_iter += 1
loss = 0

optimizer.zero_grad()

for iteration in range(1, CONFIG.SOLVER.ITER_MAX + 1):
# Clear gradients (ready to accumulate)
optimizer.zero_grad()

loss = 0
for _ in range(CONFIG.SOLVER.ITER_SIZE):
try:
_, images, labels, cls_labels = next(train_loader_iter)
except:
train_loader_iter = iter(train_loader)
_, images, labels, cls_labels = next(train_loader_iter)
avg_loss.reset()
avg_time.reset()

with torch.cuda.amp.autocast(enabled=opts.amp):
# Propagate forward
logits = model(images.to(device))
logits = model(images.to(device, non_blocking=True))

# Loss
iter_loss = 0
for logit in logits:
# Resize labels for {100%, 75%, 50%, Max} logits
_, _, H, W = logit.shape
Expand All @@ -254,62 +263,64 @@ def main():
pseudo_labels = logit.detach() * cls_labels[:, :, None, None].to(device)
pseudo_labels = pseudo_labels.argmax(dim=1)

_loss = criterion(logit, labels_.to(device)) + criterion(logit, pseudo_labels)
_loss = criterion(logit, labels_.to(device, )) + criterion(logit, pseudo_labels)

loss += _loss
iter_loss += _loss

# Propagate backward (just compute gradients wrt the loss)
loss = (loss / len(logits))
iter_loss /= CONFIG.SOLVER.ITER_SIZE

scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
scaler.scale(iter_loss).backward()
loss += iter_loss.item()

# Update learning rate
scheduler.step()
avg_loss.update(loss.item())
avg_time.update(time.time() - end_time)
# Update weights with accumulated gradients
scaler.step(optimizer)
scaler.update()

# Update learning rate
scheduler.step(epoch=iteration)

avg_loss.update(loss)
avg_time.update(time.time() - end_time)
end_time = time.time()

# TensorBoard
if iteration % 100 == 0:
print(" Itrs %d/%d, Loss=%6f, Time=%.2f , LR=%.8f" %
(iteration, CONFIG.SOLVER.ITER_MAX,
avg_loss.avg, avg_time.avg*1000, optimizer.param_groups[0]['lr']))

# validation
if iteration % opts.val_interval == 0:
print("... validation")
model.eval()
metrics.reset()
with torch.no_grad():
for _, images, labels, _ in valid_loader:
images = images.to(device, non_blocking=True)

# Forward propagation
logits = model(images)

# Pixel-wise labeling
_, H, W = labels.shape
logits = F.interpolate(logits, size=(H, W),
mode="bilinear", align_corners=False)
preds = torch.argmax(logits, dim=1).cpu().numpy()
targets = labels.cpu().numpy()
metrics.update(targets, preds)

set_train(model)
score = metrics.get_results()
print(metrics.to_str(score))

if score['Mean IoU'] > best_score: # save best model
best_score = score['Mean IoU']
torch.save(
model.module.state_dict(), os.path.join(checkpoint_dir, "checkpoint_best.pth")
)
end_time = time.time()

# TensorBoard
if curr_iter % 10 == 0:
print(" Itrs %d/%d, Loss=%6f, Time=%.2f , LR=%.8f" %
(curr_iter, CONFIG.SOLVER.ITER_MAX,
avg_loss.avg, avg_time.avg*1000, optimizer.param_groups[0]['lr']))

# validation
if curr_iter % opts.val_interval == 0:
print("... validation")
metrics.reset()
with torch.no_grad():
for _, images, labels, _ in valid_loader:
images = images.to(device)

# Forward propagation
logits = model(images)

# Pixel-wise labeling
_, H, W = labels.shape
logits = F.interpolate(logits, size=(H, W),
mode="bilinear", align_corners=False)
preds = torch.argmax(logits, dim=1).cpu().numpy()
targets = labels.cpu().numpy()

metrics.update(targets, preds)

score = metrics.get_results()
print(metrics.to_str(score))

if score['Mean IoU'] > best_score: # save best model
best_score = score['Mean IoU']
torch.save(
model.module.state_dict(), os.path.join(checkpoint_dir, "checkpoint_best.pth")
)

if curr_iter > CONFIG.SOLVER.ITER_MAX:
return


if __name__ == "__main__":

main()
30 changes: 15 additions & 15 deletions DeepLab-V2-PyTorch/train.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
# Training DeepLab-V2 using pseudo segmentation labels

#DATASET=voc12_2gpu
#LOG_DIR=Deeplabv2_pseudo_segmentation_labels_2gpu
#GT_DIR=refined_pseudo_segmentation_labels
#CUDA_VISIBLE_DEVICES=1,2 python main.py train -c configs/${DATASET}.yaml --gt_path=${GT_DIR} --log_dir=${LOG_DIR}
DATASET=voc12
CONFIG=configs/voc12_2gpu.yaml
LOG_DIR=Deeplabv2_pseudo_segmentation_labels_2gpu
GT_DIR=refined_pseudo_segmentation_labels

#---------------------------------------------------------------------------------------------------------------------
# Training DeepLab-V2 using pseudo segmentation labels
CUDA_VISIBLE_DEVICES=0,1 python train.py --config_path ${CONFIG} --gt_path ${GT_DIR} --log_dir ${LOG_DIR}

#DATASET=voc12_4gpu
#LOG_DIR=Deeplabv2_pseudo_segmentation_labels_4gpu
#GT_DIR=refined_pseudo_segmentation_labels
#CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py train -c configs/${DATASET}.yaml --gt_path=${GT_DIR} --log_dir=${LOG_DIR}

# evaluation
CUDA_VISIBLE_DEVICES=0 python main.py test \
-c configs/${DATASET}.yaml \
-m data/models/${LOG_DIR}/deeplabv2_resnet101_msc/*/checkpoint_final.pth \
--log_dir=${LOG_DIR}

CONFIG=configs/voc12_2gpu.yaml
LOG_DIR=Deeplabv2_new
GT_DIR=refined_pseudo_segmentation_labels

CUDA_VISIBLE_DEVICES=0 python main_v2.py --config_path ${CONFIG} --gt_path ${GT_DIR} --log_dir ${LOG_DIR}
# evaluate the model with CRF post-processing
CUDA_VISIBLE_DEVICES=0 python main.py crf \
-c configs/${DATASET}.yaml \
--log_dir=${LOG_DIR}

0 comments on commit 866f0e7

Please sign in to comment.