tomato_classification_model(Day 40).py

# -*- coding: utf-8 -*-
"""Tomato_classification_model.ipynb

Automatically generated by Colab.

"""

!pip install captum # install the captum module

import torch
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.models as models

import captum
from captum.attr import IntegratedGradients, Occlusion, LayerGradCam, LayerAttribution
from captum.attr import visualization as viz

import os, sys
import json

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

from google.colab import drive
drive.mount('/content/drive')

root_path = '/content/drive/MyDrive/tomato dataset/tomato dataset/'
image_path = 'Images'
label_path = 'labels'

full_path_images = os.path.join(root_path, image_path)
full_path_labels = os.path.join(root_path, label_path)

full_path_images

images = os.listdir(full_path_images)
print(images)

labels = os.listdir(full_path_labels)
print(labels)

len(images)

"""# Data Science Life Cycle


# 1. Problem Statement

Hypothetical Business Situation: Tomato Classification Model
Background
You are working as a data scientist for "FreshHarvest Inc.," a leading agricultural technology company that specializes in providing innovative solutions to improve crop yield and quality. The company has recently partnered with several large tomato farms to help them automate the sorting process of ripe and unripe tomatoes. The goal is to build an accurate and efficient classification model to distinguish between ripe and unripe tomatoes, ensuring that only the best quality produce reaches the market.

Business Need
Tomato sorting is currently done manually, which is time-consuming, labor-intensive, and prone to human error. FreshHarvest Inc. aims to implement an automated system that uses computer vision and machine learning to classify tomatoes based on ripeness. This system will reduce labor costs, increase sorting speed, and improve the overall quality of tomatoes sent to market.


#2. Data Collection

Kaggle - 177 images and labels of mixed riped and unriped tomatos.

#3. Data Wrangling

- Remove Corrupted Images

- Ensure label accuracy

- Image Quality


#4. Exploratory Data Analysis

- Distribution of Classes: Check the distribution of ripe and unripe tomatoes to ensure there is no significant class imbalance.

#5. PreProcessing

- Data Augmentation: rotation, flipping, zooming, and shifting to artificially increase the diversity of the dataset

- Normalization: Normalize the pixel values of the images to a range of [0, 1] to improve model training stability.

#6. Build and Train model

- CNN

#7. Test model

- Binary Cross Entropy

#8. Deploy!

- model.save_dict()

----------------------------------------------------------------------------

#Technical Steps:

# Step 1: We have to create a dataset -> dataloader


*   It's important to preprocess the data


# Step 2: visualize the images with it's corresponding labels.


# Step 3: Create a model

- 1. Build a model

- 2. Use Pre - trained model


# Step 4: depending on overfitting / underfitting

-Tweak the learnable parameters.

# Step 5: Use Integral attribution to explain features.
"""

first_sample = os.path.join(full_path_images,os.listdir(full_path_images)[0])

first_sample

test_img = Image.open(first_sample)
test_img_data = np.asarray(test_img)
plt.imshow(test_img_data)
plt.show()

"""transforms.ToTensor() -> converts image to tensors"""

transform = transforms.ToTensor()
image_tensor = transform(test_img)
image_tensor.shape #3channel, 275 height, 400 width

"""transforms.TOPILImage() -> converts back to PIL image."""

import torch
import torchvision
import torchvision.transforms as transforms

to_pil = transforms.ToPILImage()
img_pil = to_pil(image_tensor) #back to  image
img_pil

import pandas as pd

tomato_images_df = pd.DataFrame([title.split('.')[0] for title in os.listdir(full_path_images)], columns = ['images'])

label_list = [title.split('.')[0] for title in os.listdir(full_path_labels)]

"""We know that there's no missing unmatching label and images. Meaning - I was worried if riped_tomato_12 didnt exist for image but exist in labels."""

tomato_images_df[tomato_images_df['images'].isin(label_list)]

first_sample

df = pd.DataFrame(
    {
        'images': [title for title in os.listdir(full_path_images)],
        'labels': [title for title in os.listdir(full_path_labels)]
    }
)

df.head()

image_df = df[['images']]
image_df.sort_values(by = 'images')

image_df['labels'] = image_df['images'].apply(lambda x: x.replace('.jpeg','.txt'))

df = image_df

df

import numpy as np

substring = 'unriped'
unriped_df = df[df['labels'].str.contains(substring, case=False, na=False)]
riped_df = df[~df['labels'].str.contains(substring, case=False, na=False)]

print('The number of samples for unriped tomato: {}'.format(len(unriped_df)))
print('The number of samples for riped tomato: {}'.format(len(riped_df)))

df['target'] = np.where(df['labels'].str.contains('unriped'), 0, 1)

df.drop(['labels'],axis=1, inplace=True)

file_path = '/content/drive/MyDrive/tomato dataset/tomato dataset/Sorted Dataset/tomato_data.csv'

# Export DataFrame to CSV
df.to_csv(file_path, index=False)

"""Sort them so that their images and labels match

The bottom class function is not sorted. It will
"""

df.head()

from sklearn.model_selection import train_test_split

X = df.images
y = df.target

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify = y)

print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)

y_train.value_counts()

y_val.value_counts()

train = pd.concat([X_train,y_train],axis=1)
val = pd.concat([X_val,y_val],axis=1)

file_path_train = '/content/drive/MyDrive/tomato dataset/tomato dataset/Sorted Dataset/tomato_data_train.csv'

# Export DataFrame to CSV
train.to_csv(file_path, index=False)

file_path_val = '/content/drive/MyDrive/tomato dataset/tomato dataset/Sorted Dataset/tomato_data_val.csv'

# Export DataFrame to CSV
val.to_csv(file_path, index=False)

import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

class CustomImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, num_classes, transform=None):
        """
        Args:
            csv_file (string): Path to the CSV file with image paths and labels.
            img_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.data_frame = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.num_classes = num_classes
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = self.data_frame.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_name)

        image = Image.open(img_path).convert("RGB")
        label = int(self.data_frame.iloc[idx, 1])
        label = torch.nn.functional.one_hot(torch.tensor(label), num_classes=self.num_classes).float()


        if self.transform:
            image = self.transform(image)

        return image, label

# Define the CSV file path and image directory
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize images
])

img_dir = full_path_images

train_dataset = CustomImageDataset(csv_file=file_path_train, img_dir=img_dir, num_classes = 2, transform=transform)
val_dataset = CustomImageDataset(csv_file=file_path_val, img_dir=img_dir, num_classes = 2,transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, drop_last = False)
val_dataloader = DataLoader(val_dataset, batch_size=4, shuffle=True, drop_last = False)

for images, labels in train_dataloader:
  print(images.shape)
  print(labels.shape)
  break

for images, labels in val_dataloader:
  print(images.shape)
  print(labels.shape)
  break

"""Vanilla CNN model"""

import torch.nn as nn
class CNN_model(nn.Module):
  def __init__(self):
    super(CNN_model, self).__init__()
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
    #Outputsize = (input.size - kernel.size + 2 * padding)/stride + 1
    #(224-3 + 2 * 1)/1 + 1
    #223/1 + 1 = 224
    #(32,224,224)
    self.pool = nn.MaxPool2d(kernel_size = 3, stride = 2)
    #(input_size - kernel_size)/stride + 1
    #(224 - 3)/2 + 1
    #111.5
    #(32,111,111)
    self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1)
    #(111 - 3 + 2)/1 + 1
    #(32,111,111)
    self.fc_input_size = 32 * 55 * 55
    self.fc1 = nn.Linear(in_features = self.fc_input_size, out_features = 64)
    self.fc2 = nn.Linear(64, 2)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = x.view(x.size(0), -1)
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    return x

model = CNN_model()

for images, labels in train_dataloader:
  print(images.shape)
  print(labels.shape)
  pred = model(images)
  print(pred.shape)

  break

"""#training loop"""

!pip install torch torchvision matplotlib tensorboard

from torch.utils.tensorboard import SummaryWriter

def matplotlib_imshow(img, one_channel=False):
    if one_channel:
        img = img.mean(dim=0)
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

# Extract a batch of 4 images
dataiter = iter(train_dataloader)
images, labels = next(dataiter)

img_grid = torchvision.utils.make_grid(images)
img_grid.shape

matplotlib_imshow(img_grid, one_channel=False)

matplotlib_imshow(img_grid, one_channel=True)

writer = SummaryWriter('runs/tomato_classification')
writer

writer.add_image('Four Fashion-MNIST Images', img_grid)
writer.flush()

# Commented out IPython magic to ensure Python compatibility.
# %load_ext tensorboard
# %tensorboard --logdir runs

print(len(val_dataloader))
print(len(train_dataloader))

loss_fn = F.binary_cross_entropy_with_logits
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

for j, vdata in enumerate(train_dataloader):
  image, label = vdata
  print(image.shape)
  print(label.shape)
  break

for j, vdata in enumerate(val_dataloader):
  image, label = vdata
  print(image.shape)
  print(label.shape)
  break

len(train_dataloader)

"""1. learning_rate = 0.001 - overfits epoch 1 ~ 5

Since the loss was highly unstable, I decided to decrease learning rate

2. Changed learning_rate = 0.0001 epoch 25
"""

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN_model().to(device)
loss_fn = F.binary_cross_entropy_with_logits
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

for epoch in range(25):  # loop over the dataset multiple times
    running_loss = 0.0

    for i, data in enumerate(train_dataloader):
        # basic training loop
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)


        optimizer.zero_grad()
        output = model(inputs)
        loss = loss_fn(output, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 0:    # Every 1000 mini-batches...
            # Check against the validation set
            running_vloss = 0.0

            # In evaluation mode some model specific operations can be omitted eg. dropout layer
            model.train(False) # Switching to evaluation mode, eg. turning off regularisation
            with torch.no_grad():
              for j, vdata in enumerate(val_dataloader):
                  vinputs, vlabels = vdata
                  vinputs, vlabels = inputs.to(device), labels.to(device)
                  voutputs = model(vinputs)
                  vloss = loss_fn(voutputs, vlabels)
                  running_vloss += vloss.item()


            model.train(True) # Switching back to training mode, eg. turning on regularisation

            avg_loss = running_loss / 1000
            avg_vloss = running_vloss / len(val_dataloader)
            number_of_samples = i+1 * epoch+1 * 4
            print(f'Epoch {epoch}, Batch {i+1} Numbers of samples observed : {number_of_samples}')
            print(f'training loss {avg_loss}')
            print(f'validation loss {avg_vloss}')

            # Log the running loss averaged per batch
            writer.add_scalars('Training vs. Validation Loss',
                            { 'Training' : avg_loss, 'Validation' : avg_vloss },
                            epoch * len(train_dataloader) + i)

            running_loss = 0.0
print('Finished Training')

# Commented out IPython magic to ensure Python compatibility.
# %reload_ext tensorboard
# %tensorboard --logdir runs

dataiter = iter(train_dataloader)
images, labels = next(dataiter)
images, labels = images.to(device), labels.to(device)

writer.add_graph(model, images)

writer.flush()

# Commented out IPython magic to ensure Python compatibility.
# %reload_ext tensorboard
# %tensorboard --logdir runs

df

"""Prepare recipes to input

1.for the featuures input - i need my images as shape [sample size, height * width]


2. class_labels - list of true labels

3. label_images = should be in the shape of [N,C,H,W] Sample size, channels, height width

4. tag = string value that represents the embedding.
"""

images, labels = [], []
for index, data in enumerate(train_dataloader):
  image, label = data
  images.append(image)
  labels.append(label)

len(images)

images[0].shape

x = torch.randn(2, 3)
x

x_con = torch.cat((x,x,x), 0)
x_con.shape

images[0].view(images[0].size(0), -1).shape

all_images, all_labels, all_label_img = [], [], []

for batch in train_dataloader:
    inputs, targets = batch
    all_label_img.append(inputs)
    inputs_flattened = inputs.view(inputs.size(0), -1)
    all_images.append(inputs_flattened)
    targets = targets.argmax(dim=1)
    all_labels.append(targets)

stacked_all_labels = [tensor.item() for batch_label in all_labels for tensor in batch_label]
stacked_all_labels

stacked_all_images = torch.cat(all_images, dim=0)
stacked_all_labels  = torch.cat(all_labels, dim=0)
label_img = torch.cat(all_label_img, dim=0)

label_img.shape

stacked_all_images.shape

stacked_all_labels.shape

label_img.shape

stacked_all_images.shape

stacked_all_labels = [item.item() for item in stacked_all_labels] #list now not tensor

label_img.shape

label_img = label_img[:, 0:1, :, :]

label_img.shape

writer.add_embedding(stacked_all_images, #feature [batch(all stacked), channel * height * width]
                    metadata = stacked_all_labels, #class labels (list)
                    label_img = label_img, #all images stacked up [batch(all stacked),channel,height,width]
                    tag = 'My Embedding',
                    global_step = 0)

writer.flush()
writer.close()

# Commented out IPython magic to ensure Python compatibility.
# %reload_ext tensorboard
# %tensorboard --logdir runs/tomato_classification

"""# Transfer Learning"""

from torchvision.models import resnet50, ResNet50_Weights

"""There's multipel versions of pretrained models for ResNet. V1 has less accuracy (the oldest version) and V2 has the newest version (the new veresion).

There are several reasons why old versions of models and weights are maintained and made available even when newer versions with better performance exist. Here are some key reasons:

1. Backward Compatibility
Existing Workflows: Many organizations and developers have existing workflows, scripts, and models that rely on older versions of the weights. Updating these workflows to use newer versions might require significant changes and testing.
Reproducibility: Scientific research and publications often cite specific versions of models and weights. Keeping older versions ensures that results can be reproduced and validated by others.
2. Performance Trade-offs
Inference Speed: In some cases, newer versions of weights might provide better accuracy but at the cost of increased computational resources or longer inference times. Users might prefer older versions for applications where speed is more critical than accuracy.
Memory Usage: Newer models might require more memory, making them unsuitable for deployment on devices with limited resources.
3. Baseline Comparisons
Benchmarking: Older versions serve as baselines for comparing the performance of new models and weights. This is crucial for understanding the improvements and trade-offs of newer versions.
Algorithm Development: Researchers and developers often need to compare their new algorithms against established baselines to demonstrate improvements.
4. Model Training and Fine-Tuning
Transfer Learning: Some users may prefer to start with older weights for specific transfer learning tasks, depending on the characteristics of their datasets or the specific features learned by the older weights.
Training Stability: Older weights might be preferred in certain scenarios where they have shown to provide more stable training or convergence properties for specific tasks.
5. Historical Context
Legacy Systems: Some legacy systems and applications are built with older versions of models. Changing these systems might not be feasible due to regulatory, technical, or financial constraints.
Documentation and Tutorials: Many educational resources, tutorials, and documentation are built around older versions of models. Maintaining these versions ensures that learners and practitioners can follow along with existing educational material.
"""

resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)

#The newer version has accuracy of 80.858%
resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

weights = ResNet50_Weights.DEFAULT
preprocess = weights.transforms()

# Apply it to the input image
#

"""The below transformation will preprocess (normalize the data)."""

weights = ResNet50_Weights.DEFAULT
model = resnet50(weights = weights)
model.eval()

for name, child in model.named_children():
    print(f'{name}')

model.layer4 #Each layer is accessible with a 'key'. The list of 'key' is shown by above code.

for name, child in model.named_children():
    print(f'{name}')
    for name_2, params in child.named_parameters():
        print(name_2)
        print(params.shape)


    break

num_features = model.fc.in_features
num_features

model.fc = nn.Linear(num_features, 2)
model.fc

for child, child_module in model.named_children():
  print(child)

model.fc

for param in model.parameters():
  param.requires_grad = False

for param in model.fc.parameters():
  param.requires_grad = True

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.0001)

num_epochs = 10  # Set the number of epochs
model.train()

for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_dataloader):
        img_transformed = preprocess(data)

        optimizer.zero_grad()
        output = model(img_transformed)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % 10 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Batch {batch_idx+1}/{len(train_dataloader)}, Loss: {loss.item()}')

from sklearn.metrics import accuracy_score

model.eval()

val_concat_dump = []
val_dump = []
actual_label_val = []
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
loss_fn = F.binary_cross_entropy_with_logits
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

for epoch in range(1):
  for batch_idx, (data, target) in enumerate(val_dataloader):
    img_transformed = preprocess(data)

    with torch.no_grad():
      output = model(img_transformed)

    predicted_label = output.argmax(dim=1)
    actual_label = target.argmax(dim=1)

    val_dump.append(predicted_label)
    actual_label_val.append(actual_label)

  val_concat_dump = torch.cat(val_dump, dim = 0)
  actual_label_concat_dump = torch.cat(actual_label_val, dim = 0)

accuracy = accuracy_score(actual_label_concat_dump.numpy(), val_concat_dump.numpy())
print(f'Validation Accuracy: {accuracy * 100:.2f}%')

val_concat_dump

actual_label_concat_dump

"""## Feature attribution"""

integrated_gradients = IntegratedGradients(model)
integrated_gradients

for image, label in iter(val_dataloader):
  print(image.shape)
  print(label.shape)
  break

attributions_ig = integrated_gradients.attribute(image, target = label, n_steps = 200)

attributions_ig

"""#what is quantized machine learning/ quantized weights:

Quantization has couple benefits and concepts:

1. Floating point to integer:
-quantization typically involves converting 32-bit floating point numbers (FP32) to lower precision formats such as 8 bits (INT8).

2. Efficiency improvement:
Memory Footprint: Lower precision numbers require less memory, leading to a reduced memory footprint for the model.
Inference Speed: Integer arithmetic operations are faster and more power-efficient than floating-point operations, resulting in faster inference times and lower power consumption.


3. Types of quantization:

- post: the model is trained in full precision and quantization is applied after training. Small loss of accuracy but simpler.

- pre(quantization aware training): Model is trained with quantization in mind, simulating the effects of quantization during the training process. Preserves more accuracy.

4. Use Cases:

- Good for mobile devices/applications where computational power and battery life are constrained.

Use Cases:

Regular Weights: Preferred for training and tasks requiring high precision and large computational resources.
Quantized Weights: Preferred for deployment and inference on resource-constrained devices where speed and efficiency are prioritized over minimal accuracy loss.


Quantization Process:

Regular Weights: Directly used in the form they are trained.

Quantized Weights: Often require a process called Quantization Aware Training (QAT) or post-training quantization to convert the FP32 weights to INT8 while attempting to minimize the impact on model accuracy.

Example in Context
For instance, in the context of the MobileNetV3

model:
MobileNet_V3_Large_QuantizedWeights.

IMAGENET1K_QNNPACK_V1:


These quantized weights are optimized for inference on CPUs using QNNPACK backend, suitable for mobile and edge devices.

MobileNet_V3_Large_Weights.


IMAGENET1K_V2:
These are regular FP32 weights, providing slightly better accuracy and suitable for environments where computational resources are less constrained.

## Important Note about quantization

PyTorch supports INT8 quantization compared to regular FP32 models(float) for a 4x reducton in the model size and 4x reduction in memory bandwidth requirements.

#How quantization works:

- Symmetric quantization:

The range of the floating-point numbers is symmetrically distributed around zero.

1. Scaling factor:

s = max(abs(min), abs(max)) / (2^b-1 - 1)

2. zero point = z = 0

3. quantization: q = round(x/s)

4. dequantization:

x = q * s

- Asymmetric quantization:

In asymmetric quantization, the range of the floating-point numbers is not necessarily centered around zero. This approach uses a zero point to handle cases where the distribution of values does not include zero or is not symmetric around zero.

1. Scaling factor:

s = (max - min) / (2^b - 1)


2. Zero Point: z = round(-min/s)


3. quantization:
 q = round(x/s) + z

4. dequatization:
x = (q-z)*s

''''''''''''''''''''''''''''''''''''general equation underneath:----------

The linear quantization:

q = round((x - min)÷s)

When we linearly dequantize:

x = q * s + min

s - Scaling value

This is the most important parameter.

s = (max-min)/(2^b - 1)

if you want 8 bit quantization, you put 8 in the b.

min = -0.8, max = 0.6

s = (0.6-(-0.8)) / 255
  = 1.4/255 = 0.0055

Zero Point (z):

z =  is the real number zero.
z = for symmetric quantization, the zero point is usually zero.
For asymmetric, it is z = -min/s

#Quantization-Aware Training (Pre quantization)
During training, quantization-aware training (QAT) simulates quantization effects in the forward and backward passes to improve the robustness of the model when weights and activations are quantized during inference.

Fake Quantization:
In QAT, "fake" quantization is applied where values are quantized and dequantized during training:

quantized x = s * round(x/s)

This ensures that the model learns weights that are robust to quantization.

Gradient Propagation:
During backpropagation, gradients are calculated based on the fake quantized values, allowing the model to adjust the weights to minimize the quantization error.

#Post Quantization

Post-training quantization (PTQ) involves training the model with full precision and then quantizing it afterward. This can be done in several ways:

Static Quantization:
Calibrate the model using a representative dataset to determine the appropriate scale and zero points.

Dynamic Quantization:
Quantize weights statically but dynamically quantize activations during inference.
"""

from torchvision.models import resnet50, ResNet50_Weights

# Old weights with accuracy 76.130%
resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)

# New weights with accuracy 80.858%
resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

# Best available weights (currently alias for IMAGENET1K_V2)
# Note that these weights may change across versions
resnet50(weights=ResNet50_Weights.DEFAULT)

# Strings are also supported
resnet50(weights="IMAGENET1K_V2")

# No weights - random initialization
resnet50(weights=None)

import torch

model = torch.hub.load('pytorch/vision', 'resnet50', weights = 'IMAGENET1K_V2')
model

import torch

weight_enum = torch.hub.load('pytorch/vision', 'get_model_weights', name = 'resnet50')
weight_enum

print([weight for weight in weight_enum])

"""Classification
The following classification models are available, with or without pre-trained weights:

AlexNet
ConvNeXt
DenseNet
EfficientNet
EfficientNetV2
GoogLeNet
Inception V3
MaxVit
MNASNet
MobileNet V2
MobileNet V3
RegNet
ResNet
ResNeXt
ShuffleNet V2
SqueezeNet
SwinTransformer
VGG
VisionTransformer
Wide ResNet
"""

import torch
from torchvision.io import read_image
from torchvision.models.quantization import resnet50
from torchvision.models.quantization import ResNet50_QuantizedWeights

img = read_image(first_sample)
weights = ResNet50_QuantizedWeights.DEFAULT
# Pass quantize=True to use quantized weights
model = resnet50(weights, quantize=True)
preprocess = weights.transforms()

img.shape

img.dtype

"""## torch.qint8 -> torch.quint8"""

#torch.qint8 is a signed 8-bit quantizied integer
#Ranges from [-128, 127]

#torch.quint8 is a unsigned 8-bit quantized integer.
#ranges from [0, 255]

state_dict = model.state_dict()
state_dict['layer1.0.conv1.weight'].dtype

"""Since our model input is qint8, we need to quantize into qint8"""

quint8_tensor  = torch.quantize_per_tensor(preprocessed_img, scale=1.0, zero_point=0, dtype=torch.quint8)
print('original image', img.dtype)
print('original image', img.shape)
print("preprocessed image", preprocessed_img.dtype)
print("preprocessed image", preprocessed_img.shape)
print("Quantized quint8 tensor:", quint8_tensor.dtype)
print("Quantized quint8 tensor:", quint8_tensor.shape)
dequantizied_tensor = quint8_tensor.dequantize()
print("Dequantized quint8 tensor:", dequantizied_tensor.dtype)
print("Dequantized quint8 tensor:", dequantizied_tensor.shape)

qint8_tensor  = torch.quantize_per_tensor(preprocessed_img, scale=1.0, zero_point=0, dtype=torch.qint8)
print('original image', img.dtype)
print('original image', img.shape)
print("preprocessed image", preprocessed_img.dtype)
print("preprocessed image", preprocessed_img.shape)
print("Quantized qint8 tensor:", qint8_tensor.dtype)
print("Quantized qint8 tensor:", qint8_tensor.shape)
dequantizied_tensor = qint8_tensor.dequantize()
print("Dequantized qint8 tensor:", dequantizied_tensor.dtype)
print("Dequantized qint8 tensor:", dequantizied_tensor.shape)

for name, param in model.named_parameters():
  print(param.dtype)
  break

for child, child_module in model.named_children():
  print(child)

input_features = model.fc.in_features
input_features

model.fc = nn.Linear(input_features, 2)

model.fc

for param in model.parameters():
  param.required_grad = False

for param in model.fc.parameters():
  param.required_grad = True

import torch

# define a floating point model
class M(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = torch.nn.Linear(4, 4)

    def forward(self, x):
        x = self.fc(x)
        return x

model_fp32 = M()
model_int8 = torch.ao.quantization.quantize_dynamic(
    model_fp32,  # the original model
    {torch.nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8)  # the target dtype for quantized weights

# run the model
input_fp32 = torch.randn(4, 4, 4, 4)
res = model_int8(input_fp32)

model_int8.fc.weight().dtype

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.0001)

num_epochs = 10  # Set the number of epochs
model.train()

for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_dataloader):
        #the img_transformed is dequantized - float32
        print(data.dtype)
        break

        optimizer.zero_grad()
        output = model(img_quantized) #the model is quantized - int8
        print(output.dtype)

        if output.dtype != torch.float32:
          print(f'{output.dtype} is not float')
          break

        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % 10 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Batch {batch_idx+1}/{len(train_dataloader)}, Loss: {loss.item()}')

from torchvision.io import read_image
from torchvision.models.quantization import resnet50, ResNet50_QuantizedWeights

img = read_image(first_sample)

# Step 1: Initialize model with the best available weights
weights = ResNet50_QuantizedWeights.DEFAULT
model = resnet50(weights=weights, quantize=True)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = preprocess(img).unsqueeze(0)

# Step 4: Use the model and print the predicted category
prediction = model(batch).squeeze(0).softmax(0)
class_id = prediction.argmax().item()
score = prediction[class_id].item()
category_name = weights.meta["categories"][class_id]
print(f"{category_name}: {100 * score}%")

from torchvision.io import read_image
from torchvision.models.quantization import GoogLeNet_QuantizedWeights

weights = GoogLeNet_QuantizedWeights.DEFAULT

"""Pre-training Quantization
Pre-training quantization is the process of training a neural network directly with quantized weights and activations from the beginning. This approach is also known as Quantization-Aware Training (QAT).


Post-training Quantization
Post-training quantization is the process of converting a fully trained model (using full precision weights) to a quantized version after the training has completed. This is also known as Post-Training Quantization (PTQ).

Pre-training Quantization (QAT) involves training a model with quantization effects simulated during training, allowing the model to learn and adjust for quantization-induced errors, often resulting in higher accuracy for the quantized model.
Post-training Quantization (PTQ) involves converting a fully trained model to a quantized version, offering simplicity and flexibility at the potential cost of a slight drop in accuracy, which can be mitigated using calibration techniques.

#Semantic Segmentation

- Label each pixel in the image - making the model capable to identify various objects inside the image.
"""

from torchvision.io.image import read_image
from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights
from torchvision.transforms.functional import to_pil_image

img = read_image(first_sample)

img.shape

weights = FCN_ResNet50_Weights.DEFAULT
model = fcn_resnet50(weights=weights)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = preprocess(img).unsqueeze(0)
batch.shape

batch.shape

with torch.no_grad():
    prediction = model(batch)["out"]
normalized_masks = prediction.softmax(dim=1)
class_to_idx = {cls: idx for (idx, cls) in enumerate(weights.meta["categories"])}
mask = normalized_masks[0, class_to_idx["dog"]]
to_pil_image(mask).show()

normalized_masks.shape

class_to_idx['dog']

normalized_masks[0,12].shape

weights.meta["categories"]

mask = (mask - mask.min()) / (mask.max() - mask.min())

to_pil_image(mask).show()


"""Manipulating the pretrained model for our use case

1. Load the pretrained model

2. Modify the output layer
We usually just have a linear layer in the end. and don't touch the other layers weights

3. Freezing Layers
we only need to add nn.Linear(input, our desired output featre number)

4. Feature Extraction
this extracts features from intermediate layers of the pretrained model, which we can use as input to another model.

get_the_weights = model.features(input_data)

5. Fine-tuning


"""

import torch
import torch.nn as nn
import torchvision.models as models

resnet18 = models.resnet18(pretrained=True)

for param in resnet18.parameters():
  param.requires_grad = False

resnet18.layer4.parameters()

num_ftrs = resnet18.fc.in_features
num_ftrs

resnet18.fc = nn.Linear(num_ftrs, 2)
resnet18

inputs = torch.randn(5, 3, 224, 224)
labels = torch.randint(0, 10, (5,))
labels

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet18.fc.parameters(), lr=0.001, momentum=0.9)

for epoch in range(5):
    optimizer.zero_grad()
    outputs = resnet18(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    print(outputs)