# This Python 3 environment comes with many helpful analytics
libraries installed
# It is defined by the kaggle/python Docker image:
https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/"
directory
# For example, running this (by clicking run or pressing Shift+Enter)
will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/)
that gets preserved as output when you create a version using "Save &
Run All"
# You can also write temporary files to /kaggle/temp/, but they won't
be saved outside of the current session
/kaggle/input/emotions/text.csv
import tensorflow as tf
print("TF GPUs:", tf.config.list_physical_devices('GPU')-:50:-: E
external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to
register cuFFT factory: Attempting to register factory for plugin
cuFFT when one has already been registered
WARNING: All log messages before absl::InitializeLog() is called are
written to STDERR
E0000 00:00:- cuda_dnn.cc:8310] Unable to
register cuDNN factory: Attempting to register factory for plugin
cuDNN when one has already been registered
E0000 00:00:- cuda_blas.cc:1418] Unable to
register cuBLAS factory: Attempting to register factory for plugin
cuBLAS when one has already been registered
TF GPUs: [PhysicalDevice(name='/physical_device:GPU:0',
device_type='GPU')]
import torch
if torch.cuda.is_available():
print("CUDA is available ✅")
print("Device Name:", torch.cuda.get_device_name(0))
else:
print("CUDA is NOT available ❌")
CUDA is available ✅
Device Name: Tesla P100-PCIE-16GB
df=pd.read_csv("/kaggle/input/emotions/text.csv")
df.info()
RangeIndex: 416809 entries, 0 to 416808
Data columns (total 3 columns):
#
Column
Non-Null Count
Dtype
--- ----------------------0
Unnamed:- non-null int64
1
text
416809 non-null object
2
label
416809 non-null int64
dtypes: int64(2), object(1)
memory usage: 9.5+ MB
df.head(10)
Unnamed:
label-
0
text
0
i just feel really helpless and heavy hearted
1
ive enjoyed being able to slouch about relax a...
2
i gave up my internship with the dmrg and am f...
3
i dont know i feel so lost
4
i am a kindergarten teacher and i am thoroughl...
5
i was beginning to feel quite disheartened
6
i would think that whomever would be lucky eno...
7
i fear that they won t ever feel that deliciou...
8
im forever taking some time out to have a lie ...
9
i can still lose the weight without feeling de...
df.isnull().sum()
Unnamed: 0
text
0
0
label
dtype: int64
0
import re
# Data Cleaning:def clean_text(text):
text= re.sub(r'<.*?>'," ",text)
text= re.sub(r'@\w+'," ",text)
text= re.sub(r'http\S+|www\.\S+'," ",text)
text=re.sub(r'\[.*?\]'," ",text)
text=re.sub('r[^a-zA-Z\s]'," ",text)
text= re.sub(r'\d+'," ",text)
text=text.lower()
text= " ".join(text.split())
return text
df["clean_text"]=df["text"].apply(clean_text)
df.head(20)
Unnamed: 0
label \-
text
i just feel really helpless and heavy hearted
ive enjoyed being able to slouch about relax a...
i gave up my internship with the dmrg and am f...
i dont know i feel so lost
i am a kindergarten teacher and i am thoroughl...
i was beginning to feel quite disheartened
i would think that whomever would be lucky eno...
i fear that they won t ever feel that deliciou...
im forever taking some time out to have a lie ...
i can still lose the weight without feeling de...
i try to be nice though so if you get a bitchy...
im feeling a little like a damaged tree and th...
i have officially graduated im not feeling as ...
-
13
i feel like a jerk because the library student...
14
i feel my portfolio demonstrates how eager i a...
15
i may be more biased than the next because i h...
16
i didn t feel terrific
17
i miss all the others as well that feel that i...
18
i feel so stupid that i realise it so late
19
i saunter through the airport terminals feelin...
clean_text
i just feel really helpless and heavy hearted
ive enjoyed being able to slouch about relax a...
i gave up my internship with the dmrg and am f...
i dont know i feel so lost
i am a kindergarten teacher and i am thoroughl...
i was beginning to feel quite disheartened
i would think that whomever would be lucky eno...
i fear that they won t ever feel that deliciou...
im forever taking some time out to have a lie ...
i can still lose the weight without feeling de...
i try to be nice though so if you get a bitchy...
im feeling a little like a damaged tree and th...
i have officially graduated im not feeling as ...
i feel like a jerk because the library student...
i feel my portfolio demonstrates how eager i a...
i may be more biased than the next because i h...
i didn t feel terrific
i miss all the others as well that feel that i...
i feel so stupid that i realise it so late
i saunter through the airport terminals feelin...
#Tokenization and Lemmatization:
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
lemmatizer= WordNetLemmatizer()
def tokenize_and_lemm(text):
tokens= word_tokenize(text)
lem_tokens= [lemmatizer.lemmatize(token,pos="v") for token in
tokens]
return " ".join(lem_tokens)
df["clean_text"]= df["clean_text"].apply(tokenize_and_lemm)
df.head(20)
Unnamed: 0
label \-
text
i just feel really helpless and heavy hearted
ive enjoyed being able to slouch about relax a...
i gave up my internship with the dmrg and am f...
i dont know i feel so lost
i am a kindergarten teacher and i am thoroughl...
i was beginning to feel quite disheartened
i would think that whomever would be lucky eno...
i fear that they won t ever feel that deliciou...
im forever taking some time out to have a lie ...
i can still lose the weight without feeling de...
i try to be nice though so if you get a bitchy...
im feeling a little like a damaged tree and th...
i have officially graduated im not feeling as ...
i feel like a jerk because the library student...
i feel my portfolio demonstrates how eager i a...
i may be more biased than the next because i h...
i didn t feel terrific
i miss all the others as well that feel that i...
i feel so stupid that i realise it so late
i saunter through the airport terminals feelin...
clean_text
i just feel really helpless and heavy hearted
-
ive enjoy be able to slouch about relax and un...
i give up my internship with the dmrg and be f...
i dont know i feel so lose
i be a kindergarten teacher and i be thoroughl...
i be begin to feel quite dishearten
i would think that whomever would be lucky eno...
i fear that they win t ever feel that deliciou...
im forever take some time out to have a lie do...
i can still lose the weight without feel deprive
i try to be nice though so if you get a bitchy...
im feel a little like a damage tree and that m...
i have officially graduate im not feel as ecst...
i feel like a jerk because the library student...
i feel my portfolio demonstrate how eager i be...
i may be more bias than the next because i hav...
i didn t feel terrific
i miss all the others as well that feel that i...
i feel so stupid that i realise it so late
i saunter through the airport terminals feel t...
# Vectorization using Tf-idf Vectorizer:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vectorizer= TfidfVectorizer(max_features=5000,
stop_words="english")
x= tfidf_vectorizer.fit_transform(df["clean_text"])
y= df["label"]
x.shape
(416809, 5000)
# Importing libraries:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
X_train,X_test,y_train,y_test=
train_test_split(x,y,test_size=0.2,random_state=42)
# Applying Machine Learning Models:## Using Logistic Regression Model:
logreg= LogisticRegression(max_iter=1000, multi_class="ovr")
logreg.fit(X_train,y_train)
y_pred= logreg.predict(X_test)
accuracy= accuracy_score(y_pred,y_test)
classification_rep = classification_report(y_test, y_pred)
accuracy, classification_rep
-,
'
precision
recall f1-score
support\n\n-\n-\n-\n-\n-\n-\n\n
accuracy-\n
macro avg-
nweighted avg-\n')
-\
accuracy-
y_test.shape
(83362,)
X_test.shape
(83362, 5000)
## USING SVM MODEL:
from sklearn.svm import SVC
from sklearn.metrics import classification_report,accuracy_score
from sklearn.model_selection import train_test_split
x_small=x[:5000]
y_small=y[:5000]
X_train,X_test,y_train,y_test=
train_test_split(x_small,y_small,test_size=0.2,random_state=42)
svm_model= SVC(kernel="linear")
svm_model.fit(X_train,y_train)
y_pred= svm_model.predict(X_test)
acc= accuracy_score(y_test,y_pred)
report= classification_report(y_test,y_pred)
acc
0.797
X_test.shape
(1000, 5000)
y_test.shape
(1000,)
## USING DECISION TREE MODEL:
from sklearn.tree import DecisionTreeClassifier
X_train,X_test,y_train,y_test=
train_test_split(x,y,test_size=0.2,random_state=42)
clf=DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)
print('Decision Tree model has been trained.')
Decision Tree model has been trained.
from sklearn.metrics import accuracy_score
y_pred=clf.predict(X_test)
accu= accuracy_score(y_test,y_pred)
accu-
y_test.shape
(83362,)
## USING RANDOM FOREST MODEL:
from sklearn.ensemble import RandomForestClassifier
rf_clf= RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train,y_train)
print("Random Forest model on Emotions dataset has been trained")
Random Forest model on Emotions dataset has been trained
rf_y_pred= rf_clf.predict(X_test)
acc_rf_clf= accuracy_score(y_test,rf_y_pred)
acc_rf_clf-
df["label"].unique()
array([4, 0, 2, 1, 5, 3])
## USING KNN MODEL:
from sklearn.neighbors import KNeighborsClassifier
knn= KNeighborsClassifier(n_neighbors=6)
knn.fit(X_train,y_train)
y_pred= knn.predict(X_test)
y_pred
array([0, 1, 3, ..., 1, 1, 1])
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred-
df["text"][10]
'i try to be nice though so if you get a bitchy person on the phone or
at the window feel free to have a little fit and throw your pen at her
face'
import tensorflow as tf
from tensorflow import keras
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,rando
m_state=42)
model=keras.Sequential([
keras.layers.Dense(64,activation='relu',input_shape=(5000,),
keras.layers.Dropout(0.3),
keras.layers.Dense(64, activation='relu'),
keras.layers.Dropout(0.3),
keras.layers.Dense(6,activation='softmax')
])
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(X_train, y_train, epochs=6, batch_size=64,
validation_split=0.1)
/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/
dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim`
argument to a layer. When using Sequential models, prefer using an
`Input(shape)` object as the first layer in the model instead.
super().__init__(activity_regularizer=activity_regularizer,
**kwargs)
I0000 00:00:- gpu_device.cc:2022] Created
device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB
memory: -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id:
0000:00:04.0, compute capability: 6.0
Epoch 1/6
WARNING: All log messages before absl::InitializeLog() is called are
written to STDERR
I0000 00:00:- service.cc:148] XLA service
0x7c894c00b590 initialized for platform CUDA (this does not guarantee
that XLA will be used). Devices:
I0000 00:00:- service.cc:156]
StreamExecutor
device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:- cuda_dnn.cc:529] Loaded cuDNN
version 90300
1/4690 ━━━━━━━━━━━━━━━━━━━━ 3:54:15 3s/step - accuracy: 0.0625 loss: 1.7992
I0000 00:00:- device_compiler.h:188] Compiled
cluster using XLA! This line is logged at most once for the lifetime
of the process.
4690/4690 ━━━━━━━━━━━━━━━━━━━━ 255s 54ms/step - accuracy: 0.7764
loss: 0.6100 - val_accuracy: 0.8833 - val_loss: 0.2507
Epoch 2/6
4690/4690 ━━━━━━━━━━━━━━━━━━━━ 25s 5ms/step - accuracy:- - val_accuracy: 0.8844 - val_loss: 0.2431
Epoch 3/6
4690/4690 ━━━━━━━━━━━━━━━━━━━━ 15s 3ms/step - accuracy:- - val_accuracy: 0.8848 - val_loss: 0.2416
Epoch 4/6
4690/4690 ━━━━━━━━━━━━━━━━━━━━ 16s 3ms/step - accuracy:- - val_accuracy: 0.8816 - val_loss: 0.2476
Epoch 5/6
4690/4690 ━━━━━━━━━━━━━━━━━━━━ 15s 3ms/step - accuracy:- - val_accuracy: 0.8803 - val_loss: 0.2568
Epoch 6/6
4690/4690 ━━━━━━━━━━━━━━━━━━━━ 15s 3ms/step - accuracy:- - val_accuracy: 0.8812 - val_loss: 0.2607
loss:
loss:
loss:
loss:
loss:
# Evaluate
test_loss, test_acc= model.evaluate(X_test,y_test)
2606/2606 ━━━━━━━━━━━━━━━━━━━━ 27s 10ms/step - accuracy: 0.8812 loss: 0.2579
test_acc-
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳
━━━━━━━━━━━━━━━━━┓
┃ Layer (type)
┃ Output Shape
┃
Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇
━━━━━━━━━━━━━━━━━┩
│ dense (Dense)
│ (None, 64)
│
320,064 │
├──────────────────────────────────────┼─────────────────────────────┼
─────────────────┤
│ dropout (Dropout)
│ (None, 64)
│
0 │
├──────────────────────────────────────┼─────────────────────────────┼
─────────────────┤
│ dense_1 (Dense)
│ (None, 64)
│
4,160 │
├──────────────────────────────────────┼─────────────────────────────┼
─────────────────┤
│ dropout_1 (Dropout)
│ (None, 64)
│
0 │
├──────────────────────────────────────┼─────────────────────────────┼
─────────────────┤
│ dense_2 (Dense)
│ (None, 6)
│
390 │
└──────────────────────────────────────┴─────────────────────────────┴
─────────────────┘
Total params: 973,844 (3.71 MB)
Trainable params: 324,614 (1.24 MB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 649,230 (2.48 MB)
print(x.shape)
(416809, 5000)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
results = {
"Model": ["LogisticReg.", "SVM", "Decision_T", "Rand_F.","KNN",
"ANN"],
"Accuracy": [0.8827, 0.797, 0.8072, 0.8399,0.6458,0.8808]
}
df_results = pd.DataFrame(results)
plt.figure(figsize=(10,7))
sns.barplot(data=df_results, x="Model", y="Accuracy")
plt.title("Model Accuracy Comparison")
plt.ylim(0, 1)
plt.show()
# Overall Logistic Regression and ANN Models are performed well on
Dataset as compared to other models.