diff --git a/pytorch/data.py b/pytorch/data.py new file mode 100644 index 0000000..d1e186e --- /dev/null +++ b/pytorch/data.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Author: Yue Wang +@Contact: yuewangx@mit.edu +@File: data.py +@Time: 2018/10/13 6:21 PM +""" + + +import os +import sys +import glob +import h5py +import numpy as np +from torch.utils.data import Dataset + + +def download(): + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + DATA_DIR = os.path.join(BASE_DIR, 'data') + if not os.path.exists(DATA_DIR): + os.mkdir(DATA_DIR) + if not os.path.exists(os.path.join(DATA_DIR, 'modelnet40_ply_hdf5_2048')): + www = 'https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip' + zipfile = os.path.basename(www) + os.system('wget %s; unzip %s' % (www, zipfile)) + os.system('mv %s %s' % (zipfile[:-4], DATA_DIR)) + os.system('rm %s' % (zipfile)) + + +def load_data(partition): + download() + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + DATA_DIR = os.path.join(BASE_DIR, 'data') + all_data = [] + all_label = [] + for h5_name in glob.glob(os.path.join(DATA_DIR, 'modelnet40_ply_hdf5_2048', 'ply_data_%s*.h5'%partition)): + f = h5py.File(h5_name) + data = f['data'][:].astype('float32') + label = f['label'][:].astype('int64') + f.close() + all_data.append(data) + all_label.append(label) + all_data = np.concatenate(all_data, axis=0) + all_label = np.concatenate(all_label, axis=0) + return all_data, all_label + + +def translate_pointcloud(pointcloud): + xyz1 = np.random.uniform(low=2./3., high=3./2., size=[3]) + xyz2 = np.random.uniform(low=-0.2, high=0.2, size=[3]) + + translated_pointcloud = np.add(np.multiply(pointcloud, xyz1), xyz2).astype('float32') + return translated_pointcloud + + +def jitter_pointcloud(pointcloud, sigma=0.01, clip=0.02): + N, C = pointcloud.shape + pointcloud += np.clip(sigma * np.random.randn(N, C), -1*clip, clip) + return pointcloud + + +class ModelNet40(Dataset): + def __init__(self, num_points, partition='train'): + self.data, self.label = load_data(partition) + self.num_points = num_points + self.partition = partition + + def __getitem__(self, item): + pointcloud = self.data[item][:self.num_points] + label = self.label[item] + if self.partition == 'train': + pointcloud = translate_pointcloud(pointcloud) + np.random.shuffle(pointcloud) + return pointcloud, label + + def __len__(self): + return self.data.shape[0] + + +if __name__ == '__main__': + train = ModelNet40(1024) + test = ModelNet40(1024, 'test') + for data, label in train: + print(data.shape) + print(label.shape) diff --git a/pytorch/main.py b/pytorch/main.py new file mode 100644 index 0000000..3f45f26 --- /dev/null +++ b/pytorch/main.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Author: Yue Wang +@Contact: yuewangx@mit.edu +@File: main.py +@Time: 2018/10/13 10:39 PM +""" + + +from __future__ import print_function +import os +import argparse +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch.optim.lr_scheduler import CosineAnnealingLR +from data import ModelNet40 +from model import PointNet, DGCNN +import numpy as np +from torch.utils.data import DataLoader +from util import cal_loss, IOStream +import sklearn.metrics as metrics + + +def _init_(): + if not os.path.exists('checkpoints'): + os.makedirs('checkpoints') + if not os.path.exists('checkpoints/'+args.exp_name): + os.makedirs('checkpoints/'+args.exp_name) + if not os.path.exists('checkpoints/'+args.exp_name+'/'+'models'): + os.makedirs('checkpoints/'+args.exp_name+'/'+'models') + os.system('cp main.py checkpoints'+'/'+args.exp_name+'/'+'main.py.backup') + os.system('cp model.py checkpoints' + '/' + args.exp_name + '/' + 'model.py.backup') + os.system('cp util.py checkpoints' + '/' + args.exp_name + '/' + 'util.py.backup') + os.system('cp data.py checkpoints' + '/' + args.exp_name + '/' + 'data.py.backup') + +def train(args, io): + train_loader = DataLoader(ModelNet40(partition='train', num_points=args.num_points), num_workers=8, + batch_size=args.batch_size, shuffle=True, drop_last=True) + test_loader = DataLoader(ModelNet40(partition='test', num_points=args.num_points), num_workers=8, + batch_size=args.test_batch_size, shuffle=True, drop_last=False) + + device = torch.device("cuda" if args.cuda else "cpu") + + #Try to load models + if args.model == 'pointnet': + model = PointNet(args).to(device) + elif args.model == 'dgcnn': + model = DGCNN(args).to(device) + else: + raise Exception("Not implemented") + print(str(model)) + + model = nn.DataParallel(model) + print("Let's use", torch.cuda.device_count(), "GPUs!") + + if args.use_sgd: + print("Use SGD") + opt = optim.SGD(model.parameters(), lr=args.lr*100, momentum=args.momentum, weight_decay=1e-4) + else: + print("Use Adam") + opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) + + scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) + + criterion = cal_loss + + best_test_acc = 0 + for epoch in range(args.epochs): + scheduler.step() + #################### + # Train + #################### + train_loss = 0.0 + count = 0.0 + model.train() + train_pred = [] + train_true = [] + for data, label in train_loader: + data, label = data.to(device), label.to(device).squeeze() + data = data.permute(0, 2, 1) + batch_size = data.size()[0] + opt.zero_grad() + logits = model(data) + loss = criterion(logits, label) + loss.backward() + opt.step() + preds = logits.max(dim=1)[1] + count += batch_size + train_loss += loss.item() * batch_size + train_true.append(label.cpu().numpy()) + train_pred.append(preds.detach().cpu().numpy()) + train_true = np.concatenate(train_true) + train_pred = np.concatenate(train_pred) + outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % (epoch, + train_loss*1.0/count, + metrics.accuracy_score( + train_true, train_pred), + metrics.balanced_accuracy_score( + train_true, train_pred)) + io.cprint(outstr) + + #################### + # Test + #################### + test_loss = 0.0 + count = 0.0 + model.eval() + test_pred = [] + test_true = [] + for data, label in test_loader: + data, label = data.to(device), label.to(device).squeeze() + data = data.permute(0, 2, 1) + batch_size = data.size()[0] + logits = model(data) + loss = criterion(logits, label) + preds = logits.max(dim=1)[1] + count += batch_size + test_loss += loss.item() * batch_size + test_true.append(label.cpu().numpy()) + test_pred.append(preds.detach().cpu().numpy()) + test_true = np.concatenate(test_true) + test_pred = np.concatenate(test_pred) + test_acc = metrics.accuracy_score(test_true, test_pred) + avg_per_class_acc = metrics.balanced_accuracy_score(test_true, test_pred) + outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % (epoch, + test_loss*1.0/count, + test_acc, + avg_per_class_acc) + io.cprint(outstr) + if test_acc >= best_test_acc: + best_test_acc = test_acc + torch.save(model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name) + + +def test(args, io): + test_loader = DataLoader(ModelNet40(partition='test', num_points=args.num_points), + batch_size=args.test_batch_size, shuffle=True, drop_last=False) + + device = torch.device("cuda" if args.cuda else "cpu") + + #Try to load models + model = DGCNN(args).to(device) + model = nn.DataParallel(model) + model.load_state_dict(torch.load(args.model_path)) + model = model.eval() + test_acc = 0.0 + count = 0.0 + test_true = [] + test_pred = [] + for data, label in test_loader: + + data, label = data.to(device), label.to(device).squeeze() + data = data.permute(0, 2, 1) + batch_size = data.size()[0] + logits = model(data) + preds = logits.max(dim=1)[1] + test_true.append(label.cpu().numpy()) + test_pred.append(preds.detach().cpu().numpy()) + test_true = np.concatenate(test_true) + test_pred = np.concatenate(test_pred) + test_acc = metrics.accuracy_score(test_true, test_pred) + avg_per_class_acc = metrics.balanced_accuracy_score(test_true, test_pred) + outstr = 'Test :: test acc: %.6f, test avg acc: %.6f'%(test_acc, avg_per_class_acc) + io.cprint(outstr) + + +if __name__ == "__main__": + # Training settings + parser = argparse.ArgumentParser(description='Point Cloud Recognition') + parser.add_argument('--exp_name', type=str, default='exp', metavar='N', + help='Name of the experiment') + parser.add_argument('--model', type=str, default='dgcnn', metavar='N', + choices=['pointnet', 'dgcnn'], + help='Model to use, [pointnet, dgcnn]') + parser.add_argument('--dataset', type=str, default='modelnet40', metavar='N', + choices=['modelnet40']) + parser.add_argument('--batch_size', type=int, default=32, metavar='batch_size', + help='Size of batch)') + parser.add_argument('--test_batch_size', type=int, default=16, metavar='batch_size', + help='Size of batch)') + parser.add_argument('--epochs', type=int, default=250, metavar='N', + help='number of episode to train ') + parser.add_argument('--use_sgd', type=bool, default=True, + help='Use SGD') + parser.add_argument('--lr', type=float, default=0.001, metavar='LR', + help='learning rate (default: 0.001, 0.1 if using sgd)') + parser.add_argument('--momentum', type=float, default=0.9, metavar='M', + help='SGD momentum (default: 0.9)') + parser.add_argument('--no_cuda', type=bool, default=False, + help='enables CUDA training') + parser.add_argument('--seed', type=int, default=1, metavar='S', + help='random seed (default: 1)') + parser.add_argument('--eval', type=bool, default=False, + help='evaluate the model') + parser.add_argument('--num_points', type=int, default=1024, + help='num of points to use') + parser.add_argument('--dropout', type=float, default=0.5, + help='dropout rate') + parser.add_argument('--emb_dims', type=int, default=1024, metavar='N', + help='Dimension of embeddings') + parser.add_argument('--k', type=int, default=20, metavar='N', + help='Num of nearest neighbors to use') + parser.add_argument('--model_path', type=str, default='', metavar='N', + help='Pretrained model path') + args = parser.parse_args() + + _init_() + + io = IOStream('checkpoints/' + args.exp_name + '/run.log') + io.cprint(str(args)) + + args.cuda = not args.no_cuda and torch.cuda.is_available() + torch.manual_seed(args.seed) + if args.cuda: + io.cprint( + 'Using GPU : ' + str(torch.cuda.current_device()) + ' from ' + str(torch.cuda.device_count()) + ' devices') + torch.cuda.manual_seed(args.seed) + else: + io.cprint('Using CPU') + + if not args.eval: + train(args, io) + else: + test(args, io) diff --git a/pytorch/model.py b/pytorch/model.py new file mode 100644 index 0000000..b718591 --- /dev/null +++ b/pytorch/model.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Author: Yue Wang +@Contact: yuewangx@mit.edu +@File: model.py +@Time: 2018/10/13 6:35 PM +""" + + +import os +import sys +import copy +import math +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + + +def knn(x, k): + inner = -2*torch.matmul(x.transpose(2, 1), x) + xx = torch.sum(x**2, dim=1, keepdim=True) + pairwise_distance = -xx - inner - xx.transpose(2, 1) + + idx = pairwise_distance.topk(k=k, dim=-1)[1] # (batch_size, num_points, k) + return idx + + +def get_graph_feature(x, k=20, idx=None): + batch_size = x.size(0) + num_points = x.size(2) + x = x.view(batch_size, -1, num_points) + if idx is None: + idx = knn(x, k=k) # (batch_size, num_points, k) + device = torch.device('cuda') + + idx_base = torch.arange(0, batch_size, device=device).view(-1, 1, 1)*num_points + + idx = idx + idx_base + + idx = idx.view(-1) + + _, num_dims, _ = x.size() + + x = x.transpose(2, 1).contiguous() # (batch_size, num_points, num_dims) -> (batch_size*num_points, num_dims) # batch_size * num_points * k + range(0, batch_size*num_points) + feature = x.view(batch_size*num_points, -1)[idx, :] + feature = feature.view(batch_size, num_points, k, num_dims) + x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1) + + feature = torch.cat((feature, x), dim=3).permute(0, 3, 1, 2) + + return feature + + +class PointNet(nn.Module): + def __init__(self, args, output_channels=40): + super(PointNet, self).__init__() + self.args = args + self.conv1 = nn.Conv1d(3, 64, kernel_size=1, bias=False) + self.conv2 = nn.Conv1d(64, 64, kernel_size=1, bias=False) + self.conv3 = nn.Conv1d(64, 64, kernel_size=1, bias=False) + self.conv4 = nn.Conv1d(64, 128, kernel_size=1, bias=False) + self.conv5 = nn.Conv1d(128, args.emb_dims, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm1d(64) + self.bn2 = nn.BatchNorm1d(64) + self.bn3 = nn.BatchNorm1d(64) + self.bn4 = nn.BatchNorm1d(128) + self.bn5 = nn.BatchNorm1d(args.emb_dims) + self.linear1 = nn.Linear(args.emb_dims, 512, bias=False) + self.bn6 = nn.BatchNorm1d(512) + self.dp1 = nn.Dropout() + self.linear2 = nn.Linear(512, output_channels) + + def forward(self, x): + x = F.relu(self.bn1(self.conv1(x))) + x = F.relu(self.bn2(self.conv2(x))) + x = F.relu(self.bn3(self.conv3(x))) + x = F.relu(self.bn4(self.conv4(x))) + x = F.relu(self.bn5(self.conv5(x))) + x = F.adaptive_max_pool1d(x, 1).squeeze() + x = F.relu(self.bn6(self.linear1(x))) + x = self.dp1(x) + x = self.linear2(x) + return x + + +class DGCNN(nn.Module): + def __init__(self, args, output_channels=40): + super(DGCNN, self).__init__() + self.args = args + self.k = args.k + + self.bn1 = nn.BatchNorm2d(64) + self.bn2 = nn.BatchNorm2d(64) + self.bn3 = nn.BatchNorm2d(128) + self.bn4 = nn.BatchNorm2d(256) + self.bn5 = nn.BatchNorm1d(args.emb_dims) + + self.conv1 = nn.Sequential(nn.Conv2d(6, 64, kernel_size=1, bias=False), + self.bn1, + nn.LeakyReLU(negative_slope=0.2)) + self.conv2 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False), + self.bn2, + nn.LeakyReLU(negative_slope=0.2)) + self.conv3 = nn.Sequential(nn.Conv2d(64*2, 128, kernel_size=1, bias=False), + self.bn3, + nn.LeakyReLU(negative_slope=0.2)) + self.conv4 = nn.Sequential(nn.Conv2d(128*2, 256, kernel_size=1, bias=False), + self.bn4, + nn.LeakyReLU(negative_slope=0.2)) + self.conv5 = nn.Sequential(nn.Conv1d(512, args.emb_dims, kernel_size=1, bias=False), + self.bn5, + nn.LeakyReLU(negative_slope=0.2)) + self.linear1 = nn.Linear(args.emb_dims*2, 512, bias=False) + self.bn6 = nn.BatchNorm1d(512) + self.dp1 = nn.Dropout(p=args.dropout) + self.linear2 = nn.Linear(512, 256) + self.bn7 = nn.BatchNorm1d(256) + self.dp2 = nn.Dropout(p=args.dropout) + self.linear3 = nn.Linear(256, output_channels) + + def forward(self, x): + batch_size = x.size(0) + x = get_graph_feature(x, k=self.k) + x = self.conv1(x) + x1 = x.max(dim=-1, keepdim=False)[0] + + x = get_graph_feature(x1, k=self.k) + x = self.conv2(x) + x2 = x.max(dim=-1, keepdim=False)[0] + + x = get_graph_feature(x2, k=self.k) + x = self.conv3(x) + x3 = x.max(dim=-1, keepdim=False)[0] + + x = get_graph_feature(x3, k=self.k) + x = self.conv4(x) + x4 = x.max(dim=-1, keepdim=False)[0] + + x = torch.cat((x1, x2, x3, x4), dim=1) + + x = self.conv5(x) + x1 = F.adaptive_max_pool1d(x, 1).view(batch_size, -1) + x2 = F.adaptive_avg_pool1d(x, 1).view(batch_size, -1) + x = torch.cat((x1, x2), 1) + + x = F.leaky_relu(self.bn6(self.linear1(x)), negative_slope=0.2) + x = self.dp1(x) + x = F.leaky_relu(self.bn7(self.linear2(x)), negative_slope=0.2) + x = self.dp2(x) + x = self.linear3(x) + return x diff --git a/pytorch/pretrained/model.1024.t7 b/pytorch/pretrained/model.1024.t7 new file mode 100644 index 0000000..4e99f49 Binary files /dev/null and b/pytorch/pretrained/model.1024.t7 differ diff --git a/pytorch/pretrained/model.2048.t7 b/pytorch/pretrained/model.2048.t7 new file mode 100644 index 0000000..3f615ec Binary files /dev/null and b/pytorch/pretrained/model.2048.t7 differ diff --git a/pytorch/util.py b/pytorch/util.py new file mode 100644 index 0000000..6875176 --- /dev/null +++ b/pytorch/util.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Author: Yue Wang +@Contact: yuewangx@mit.edu +@File: util +@Time: 4/5/19 3:47 PM +""" + + +import numpy as np +import torch +import torch.nn.functional as F + + +def cal_loss(pred, gold, smoothing=True): + ''' Calculate cross entropy loss, apply label smoothing if needed. ''' + + gold = gold.contiguous().view(-1) + + if smoothing: + eps = 0.2 + n_class = pred.size(1) + + one_hot = torch.zeros_like(pred).scatter(1, gold.view(-1, 1), 1) + one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1) + log_prb = F.log_softmax(pred, dim=1) + + loss = -(one_hot * log_prb).sum(dim=1).mean() + else: + loss = F.cross_entropy(pred, gold, reduction='mean') + + return loss + + +class IOStream(): + def __init__(self, path): + self.f = open(path, 'a') + + def cprint(self, text): + print(text) + self.f.write(text+'\n') + self.f.flush() + + def close(self): + self.f.close()