diff --git a/Classification/cnns/alexnet_model.py b/Classification/cnns/alexnet_model.py index d627fc0..c4e2006 100644 --- a/Classification/cnns/alexnet_model.py +++ b/Classification/cnns/alexnet_model.py @@ -60,7 +60,7 @@ def conv2d_layer( else (filters, kernel_size_1, kernel_size_2, input.shape[3]) ) weight = flow.get_variable( - name + "-weight", + name + ".weight", shape=weight_shape, dtype=input.dtype, initializer=weight_initializer, @@ -71,7 +71,7 @@ def conv2d_layer( ) if use_bias: bias = flow.get_variable( - name + "-bias", + name + ".bias", shape=(filters,), dtype=input.dtype, initializer=bias_initializer, @@ -92,7 +92,7 @@ def alexnet(images, args, trainable=True): data_format = "NHWC" if args.channel_last else "NCHW" conv1 = conv2d_layer( - "conv1", + "features.0", images, filters=64, kernel_size=11, @@ -104,22 +104,24 @@ def alexnet(images, args, trainable=True): pool1 = flow.nn.avg_pool2d(conv1, 3, 2, "VALID", data_format, name="pool1") conv2 = conv2d_layer( - "conv2", pool1, filters=192, kernel_size=5, data_format=data_format + "features.3", pool1, filters=192, kernel_size=5, data_format=data_format ) pool2 = flow.nn.avg_pool2d(conv2, 3, 2, "VALID", data_format, name="pool2") - conv3 = conv2d_layer("conv3", pool2, filters=384, data_format=data_format) + conv3 = conv2d_layer("features.6", pool2, filters=384, data_format=data_format) - conv4 = conv2d_layer("conv4", conv3, filters=384, data_format=data_format) + conv4 = conv2d_layer("features.8", conv3, filters=384, data_format=data_format) - conv5 = conv2d_layer("conv5", conv4, filters=256, data_format=data_format) + conv5 = conv2d_layer("features.10", conv4, filters=256, data_format=data_format) pool5 = flow.nn.avg_pool2d(conv5, 3, 2, "VALID", data_format, name="pool5") if len(pool5.shape) > 2: pool5 = flow.reshape(pool5, shape=(pool5.shape[0], -1)) - + print("###############") + print(pool5.shape) + print("###############") fc1 = flow.layers.dense( inputs=pool5, units=4096, @@ -131,13 +133,13 @@ def alexnet(images, args, trainable=True): kernel_regularizer=_get_regularizer(), bias_regularizer=_get_regularizer(), trainable=trainable, - name="fc1", + name="classifier.0", ) - dropout1 = flow.nn.dropout(fc1, rate=0.5) + # dropout1 = flow.nn.dropout(fc1, rate=0.5) fc2 = flow.layers.dense( - inputs=dropout1, + inputs=fc1, units=4096, activation=flow.nn.relu, use_bias=True, @@ -146,21 +148,21 @@ def alexnet(images, args, trainable=True): kernel_regularizer=_get_regularizer(), bias_regularizer=_get_regularizer(), trainable=trainable, - name="fc2", + name="classifier.2", ) - dropout2 = flow.nn.dropout(fc2, rate=0.5) + # dropout2 = flow.nn.dropout(fc2, rate=0.5) fc3 = flow.layers.dense( - inputs=dropout2, - units=1000, + inputs=fc2, + units=args.num_classes, activation=None, use_bias=False, kernel_initializer=_get_kernel_initializer(), kernel_regularizer=_get_regularizer(), bias_initializer=False, trainable=trainable, - name="fc3", + name="classifier.4", ) return fc3 diff --git a/Classification/cnns/config.py b/Classification/cnns/config.py index 330686e..488def9 100755 --- a/Classification/cnns/config.py +++ b/Classification/cnns/config.py @@ -83,7 +83,7 @@ def str2bool(v): "--pad_output", type=str2bool, nargs="?", - const=True, + const=False, help="Whether to pad the output to number of image channels to 4.", ) diff --git a/Classification/cnns/of_cnn_train_val.py b/Classification/cnns/of_cnn_train_val.py index 3d5cbbd..1377348 100755 --- a/Classification/cnns/of_cnn_train_val.py +++ b/Classification/cnns/of_cnn_train_val.py @@ -84,8 +84,8 @@ def TrainNet(): if args.train_data_dir: assert os.path.exists(args.train_data_dir) print("Loading data from {}".format(args.train_data_dir)) - (labels, images) = ofrecord_util.load_imagenet_for_training(args) - + # (labels, images) = ofrecord_util.load_imagenet_for_training(args) + (labels, images) = ofrecord_util.load_imagenet_for_training_v2(args) else: print("Loading synthetic data.") (labels, images) = ofrecord_util.load_synthetic(args) diff --git a/Classification/cnns/ofrecord_util.py b/Classification/cnns/ofrecord_util.py index c6cca07..37d3abe 100755 --- a/Classification/cnns/ofrecord_util.py +++ b/Classification/cnns/ofrecord_util.py @@ -106,6 +106,44 @@ def load_imagenet_for_training(args): ) return label, normal +def load_imagenet_for_training_v2(args): + total_device_num = args.num_nodes * args.gpu_num_per_node + train_batch_size = total_device_num * args.batch_size_per_device + output_layout = "NHWC" if args.channel_last else "NCHW" + + color_space = "RGB" + ofrecord = flow.data.ofrecord_reader( + args.train_data_dir, + batch_size=train_batch_size, + data_part_num=args.train_data_part_num, + part_name_suffix_length=5, + shuffle_after_epoch=False, + ) + image = flow.data.OFRecordImageDecoder(ofrecord, "encoded", color_space=color_space) + label = flow.data.OFRecordRawDecoder( + ofrecord, "class/label", shape=(), dtype=flow.int32 + ) + + rsz = flow.image.Resize( + image, + resize_side="shorter", + keep_aspect_ratio=True, + target_size=args.resize_shorter, + ) + + normal = flow.image.CropMirrorNormalize( + rsz[0], + color_space=color_space, + output_layout=output_layout, + crop_h=args.image_size, + crop_w=args.image_size, + crop_pos_y=0.5, + crop_pos_x=0.5, + mean=args.rgb_mean, + std=args.rgb_std, + output_dtype=flow.float, + ) + return label, normal def load_imagenet_for_validation(args): total_device_num = args.num_nodes * args.gpu_num_per_node diff --git a/Classification/cnns/train_alexnet.sh b/Classification/cnns/train_alexnet.sh new file mode 100644 index 0000000..cd57308 --- /dev/null +++ b/Classification/cnns/train_alexnet.sh @@ -0,0 +1,31 @@ + +OFRECORD_PATH="ofrecord" +if [ ! -d "$OFRECORD_PATH" ]; then + wget https://oneflow-public.oss-cn-beijing.aliyuncs.com/datasets/imagenette_ofrecord.tar.gz + tar zxf imagenette_ofrecord.tar.gz +fi + +MODEL_LOAD_DIR="initial_model_remove_mom" +CLASSES=10 + +python3 of_cnn_train_val.py \ + --train_data_dir=$OFRECORD_PATH/train \ + --val_data_dir=$OFRECORD_PATH/val \ + --train_data_part_num=1 \ + --val_data_part_num=1 \ + --num_nodes=1 \ + --gpu_num_per_node=1 \ + --optimizer="sgd" \ + --momentum=0.9 \ + --learning_rate=0.01 \ + --pad_output=False \ + --loss_print_every_n_iter=1 \ + --batch_size_per_device=512 \ + --val_batch_size_per_device=512 \ + --num_examples=9469 \ + --num_val_examples=3925 \ + --num_epoch=90 \ + --use_fp16=false \ + --model="alexnet" \ + --num_classes=$CLASSES \ + --model_load_dir=$MODEL_LOAD_DIR