Skip to content

Emotion detection using Bi-directional LSTM and Bi-directional GRU. Our model can predict 7 types of emotion from a text, fear, anger, shame, sadness, joy, disgust and guilt.

Notifications You must be signed in to change notification settings

BONDHU-BOT/Emotion-Detection-using-Deep-Learning

Repository files navigation

Contextual Emotion Detection

1. Loading Data

import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.lancaster import LancasterStemmer
import nltk
import re
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, GRU, LSTM, Bidirectional, Embedding, Dropout
from keras.callbacks import ModelCheckpoint
from livelossplot.tf_keras import PlotLossesCallback
from livelossplot import PlotLossesKeras

Dataset link:

def load_dataset(filename):
  df = pd.read_csv(filename)
  label = df["label"]
  unique_label = list(set(label))
  sentences = list(df["text"])
  
  return (df, label, unique_label, sentences)
df, label, unique_label, sentences = load_dataset('iseardataset.csv')
print(unique_label)
['fear', 'anger', 'shame', 'sadness', 'joy', 'disgust', 'guilt']
print(df.head(10))
     label                                               text Unnamed: 2
0      joy  On days when I feel close to my partner and ot...        NaN
1     fear  Every time I imagine that someone I love or I ...        NaN
2    anger  When I had been obviously unjustly treated and...        NaN
3  sadness  When I think about the short time that we live...        NaN
4  disgust  At a gathering I found myself involuntarily si...        NaN
5    shame  When I realized that I was directing the feeli...        NaN
6    guilt  I feel guilty when when I realize that I consi...        NaN
7      joy  After my girlfriend had taken her exam we went...        NaN
8     fear  When, for the first time I realized the meanin...        NaN
9    anger  When a car is overtaking another and I am forc...        NaN
import seaborn as sns
import tkinter
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
%matplotlib inline
sns.countplot(x="label", data=df)
<AxesSubplot:xlabel='label', ylabel='count'>

png

print(sentences[:5])
['On days when I feel close to my partner and other friends.   \nWhen I feel at peace with myself and also experience a close  \ncontact with people whom I regard greatly.', 'Every time I imagine that someone I love or I could contact a  \nserious illness, even death.', 'When I had been obviously unjustly treated and had no possibility  \nof elucidating this.', 'When I think about the short time that we live and relate it to  \nthe periods of my life when I think that I did not use this  \nshort time.', 'At a gathering I found myself involuntarily sitting next to two  \npeople who expressed opinions that I considered very low and  \ndiscriminating.']
nltk.download("stopwords")
nltk.download("punkt")
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/shiningflash/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /home/shiningflash/nltk_data...
[nltk_data]   Package punkt is already up-to-date!





True

2. Data Cleaning

#define stemmer
stemmer = LancasterStemmer()
def cleaning(sentences):
  words = []
  for s in sentences:
    clean = re.sub(r'[^ a-z A-Z 0-9]', " ", s)
    w = word_tokenize(clean)
    words.append([i.lower() for i in w])
    
  return words 
cleaned_words = cleaning(sentences)
print(len(cleaned_words))
print(cleaned_words[:2])  
7516
[['on', 'days', 'when', 'i', 'feel', 'close', 'to', 'my', 'partner', 'and', 'other', 'friends', 'when', 'i', 'feel', 'at', 'peace', 'with', 'myself', 'and', 'also', 'experience', 'a', 'close', 'contact', 'with', 'people', 'whom', 'i', 'regard', 'greatly'], ['every', 'time', 'i', 'imagine', 'that', 'someone', 'i', 'love', 'or', 'i', 'could', 'contact', 'a', 'serious', 'illness', 'even', 'death']]

3. Texts Tokenization

def create_tokenizer(words, filters = '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~'):
  token = Tokenizer(filters = filters)
  token.fit_on_texts(words)
  return token
def max_length(words):
  return(len(max(words, key = len)))
word_tokenizer = create_tokenizer(cleaned_words)
vocab_size = len(word_tokenizer.word_index) + 1
max_length = max_length(cleaned_words)

print("Vocab Size = %d and Maximum length = %d" % (vocab_size, max_length))
Vocab Size = 8989 and Maximum length = 179
def encoding_doc(token, words):
  return(token.texts_to_sequences(words))
encoded_doc = encoding_doc(word_tokenizer, cleaned_words)
def padding_doc(encoded_doc, max_length):
  return(pad_sequences(encoded_doc, maxlen = max_length, padding = "post"))
padded_doc = padding_doc(encoded_doc, max_length)
print("Shape of padded docs = ",padded_doc.shape)
Shape of padded docs =  (7516, 179)
#tokenizer with filter changed
output_tokenizer = create_tokenizer(unique_label, filters = '!"#$%&()*+,-/:;<=>?@[\]^`{|}~')
output_tokenizer.word_index
{'fear': 1,
 'anger': 2,
 'shame': 3,
 'sadness': 4,
 'joy': 5,
 'disgust': 6,
 'guilt': 7}
encoded_output = encoding_doc(output_tokenizer, label)
encoded_output = np.array(encoded_output).reshape(len(encoded_output), 1)
encoded_output.shape
(7516, 1)
def one_hot(encode):
  o = OneHotEncoder(sparse = False)
  return(o.fit_transform(encode))
output_one_hot = one_hot(encoded_output)
output_one_hot.shape
(7516, 7)
from sklearn.model_selection import train_test_split
train_X, val_X, train_Y, val_Y = train_test_split(padded_doc, output_one_hot, shuffle = True, test_size = 0.2)
print("Shape of train_X = %s and train_Y = %s" % (train_X.shape, train_Y.shape))
print("Shape of val_X = %s and val_Y = %s" % (val_X.shape, val_Y.shape))
Shape of train_X = (6012, 179) and train_Y = (6012, 7)
Shape of val_X = (1504, 179) and val_Y = (1504, 7)

4. Bidirectional GRU

def create_model(vocab_size, max_length):
  model = Sequential()
  model.add(Embedding(vocab_size, 128, input_length = max_length, trainable = False))
  model.add(Bidirectional(GRU(128)))
  model.add(Dense(32, activation = "relu"))
  model.add(Dropout(0.5))
  model.add(Dense(7, activation = "softmax"))
  
  return model
model = create_model(vocab_size, max_length)

model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding (Embedding)        (None, 179, 128)          1150592   
_________________________________________________________________
bidirectional (Bidirectional (None, 256)               198144    
_________________________________________________________________
dense (Dense)                (None, 32)                8224      
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 7)                 231       
=================================================================
Total params: 1,357,191
Trainable params: 206,599
Non-trainable params: 1,150,592
_________________________________________________________________
filename = 'model.h5'
checkpoint = ModelCheckpoint(filename,
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True,
                             mode='min')
hist = model.fit(train_X, train_Y,
                 epochs = 100,
                 batch_size = 32,
                 validation_data = (val_X, val_Y),
                 callbacks = [PlotLossesKeras(), checkpoint])

png

accuracy
	training         	 (min:    0.143, max:    0.936, cur:    0.936)
	validation       	 (min:    0.186, max:    0.449, cur:    0.427)
Loss
	training         	 (min:    0.173, max:    1.947, cur:    0.173)
	validation       	 (min:    1.556, max:    6.043, cur:    6.043)

Epoch 00100: val_loss did not improve from 1.55581
188/188 [==============================] - 28s 147ms/step - loss: 0.1726 - accuracy: 0.9356 - val_loss: 6.0430 - val_accuracy: 0.4269

5. Bidirectional LSTM

def create_model(vocab_size, max_length):
  model = Sequential()
  model.add(Embedding(vocab_size, 128, input_length = max_length, trainable = False))
  model.add(Bidirectional(LSTM(128)))
  model.add(Dense(32, activation = "relu"))
  model.add(Dropout(0.5))
  model.add(Dense(7, activation = "softmax"))
  
  return model

model_lstm = create_model(vocab_size, max_length)

model_lstm.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])
model_lstm.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_1 (Embedding)      (None, 179, 128)          1150592   
_________________________________________________________________
bidirectional_1 (Bidirection (None, 256)               263168    
_________________________________________________________________
dense_2 (Dense)              (None, 32)                8224      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 7)                 231       
=================================================================
Total params: 1,422,215
Trainable params: 271,623
Non-trainable params: 1,150,592
_________________________________________________________________
filename = 'model_lstm.h5'
checkpoint = ModelCheckpoint(filename,
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True,
                             mode='min')

hist = model_lstm.fit(train_X, train_Y,
                 epochs = 100,
                 batch_size = 32,
                 validation_data = (val_X, val_Y),
                 callbacks = [PlotLossesKeras(), checkpoint])

png

accuracy
	training         	 (min:    0.143, max:    0.875, cur:    0.867)
	validation       	 (min:    0.158, max:    0.437, cur:    0.430)
Loss
	training         	 (min:    0.350, max:    1.947, cur:    0.379)
	validation       	 (min:    1.602, max:    3.759, cur:    3.562)

Epoch 00100: val_loss did not improve from 1.60191
188/188 [==============================] - 46s 247ms/step - loss: 0.3785 - accuracy: 0.8674 - val_loss: 3.5621 - val_accuracy: 0.4295

6. Real-time Prediction

model = load_model("model_lstm.h5")
def predictions(text):
  clean = re.sub(r'[^ a-z A-Z 0-9]', " ", text)
  test_word = word_tokenize(clean)
  test_word = [w.lower() for w in test_word]
  test_ls = word_tokenizer.texts_to_sequences(test_word)

  if [] in test_ls:
    test_ls = list(filter(None, test_ls))
    
  test_ls = np.array(test_ls).reshape(1, len(test_ls))
  x = padding_doc(test_ls, max_length)

  pred = model.predict(x)
  
  return pred
def get_final_output(pred, classes):
  predictions = pred[0]
  classes = np.array(classes)
  ids = np.argsort(-predictions)
  classes = classes[ids]
  predictions = -np.sort(-predictions)
 
  for i in range(pred.shape[1]):
    print("%s has confidence = %s" % (classes[i], (predictions[i])))
  
  return classes[0]
def get_emotion(text):
    pred = predictions(text)
    result = get_final_output(pred, unique_label)
    print('\nans: {}\n'.format(result))
get_emotion("I did not help out enough at my thesis team.")
guilt has confidence = 0.49738222
shame has confidence = 0.36354083
anger has confidence = 0.05595107
sadness has confidence = 0.037771154
fear has confidence = 0.022115987
disgust has confidence = 0.015596252
joy has confidence = 0.0076424023

ans: guilt
get_emotion("When someone stole my bike.")
anger has confidence = 0.21699908
fear has confidence = 0.16840756
guilt has confidence = 0.16005495
sadness has confidence = 0.15744306
shame has confidence = 0.14280295
disgust has confidence = 0.11216525
joy has confidence = 0.042127196

ans: anger
get_emotion("When my girlfriend left me and tell me that I am not fit for her.")
anger has confidence = 0.40398777
sadness has confidence = 0.19539253
disgust has confidence = 0.126978
guilt has confidence = 0.090000965
joy has confidence = 0.08952445
shame has confidence = 0.070161685
fear has confidence = 0.023954567

ans: anger
get_emotion("During the Christmas holidays, I met some of my old friends.")
joy has confidence = 0.3547267
disgust has confidence = 0.2273542
anger has confidence = 0.15483476
shame has confidence = 0.105804436
sadness has confidence = 0.066970326
guilt has confidence = 0.060981136
fear has confidence = 0.029328424

ans: joy

About

Emotion detection using Bi-directional LSTM and Bi-directional GRU. Our model can predict 7 types of emotion from a text, fear, anger, shame, sadness, joy, disgust and guilt.

Topics

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published