{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### **Twitter Sentiment Analysis**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.naive_bayes import MultinomialNB\n",
"import pickle\n",
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"import pandas as pd "
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" clean_text | \n",
" category | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" when modi promised “minimum government maximum... | \n",
" -1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" talk all the nonsense and continue all the dra... | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" what did just say vote for modi welcome bjp t... | \n",
" 1.0 | \n",
"
\n",
" \n",
" 3 | \n",
" asking his supporters prefix chowkidar their n... | \n",
" 1.0 | \n",
"
\n",
" \n",
" 4 | \n",
" answer who among these the most powerful world... | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" clean_text category\n",
"0 when modi promised “minimum government maximum... -1.0\n",
"1 talk all the nonsense and continue all the dra... 0.0\n",
"2 what did just say vote for modi welcome bjp t... 1.0\n",
"3 asking his supporters prefix chowkidar their n... 1.0\n",
"4 answer who among these the most powerful world... 1.0"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv('./Twitter_Data.csv' )\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(75682, 3)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**drop unnecessary columns**"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"df = df[[2,3]].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 2 | \n",
" 3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Positive | \n",
" im getting on borderlands and i will murder yo... | \n",
"
\n",
" \n",
" 1 | \n",
" Positive | \n",
" I am coming to the borders and I will kill you... | \n",
"
\n",
" \n",
" 2 | \n",
" Positive | \n",
" im getting on borderlands and i will kill you ... | \n",
"
\n",
" \n",
" 3 | \n",
" Positive | \n",
" im coming on borderlands and i will murder you... | \n",
"
\n",
" \n",
" 4 | \n",
" Positive | \n",
" im getting on borderlands 2 and i will murder ... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 2 3\n",
"0 Positive im getting on borderlands and i will murder yo...\n",
"1 Positive I am coming to the borders and I will kill you...\n",
"2 Positive im getting on borderlands and i will kill you ...\n",
"3 Positive im coming on borderlands and i will murder you...\n",
"4 Positive im getting on borderlands 2 and i will murder ..."
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sentiments | \n",
" text | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Positive | \n",
" im getting on borderlands and i will murder yo... | \n",
"
\n",
" \n",
" 1 | \n",
" Positive | \n",
" I am coming to the borders and I will kill you... | \n",
"
\n",
" \n",
" 2 | \n",
" Positive | \n",
" im getting on borderlands and i will kill you ... | \n",
"
\n",
" \n",
" 3 | \n",
" Positive | \n",
" im coming on borderlands and i will murder you... | \n",
"
\n",
" \n",
" 4 | \n",
" Positive | \n",
" im getting on borderlands 2 and i will murder ... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sentiments text\n",
"0 Positive im getting on borderlands and i will murder yo...\n",
"1 Positive I am coming to the borders and I will kill you...\n",
"2 Positive im getting on borderlands and i will kill you ...\n",
"3 Positive im coming on borderlands and i will murder you...\n",
"4 Positive im getting on borderlands 2 and i will murder ..."
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# df.columns = ['sentiments','text']\n",
"df.rename(columns={2 : \"sentiments\" , 3 : \"text\"} , inplace= True)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 75682 entries, 0 to 75681\n",
"Data columns (total 2 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 sentiments 75682 non-null object\n",
" 1 text 74996 non-null object\n",
"dtypes: object(2)\n",
"memory usage: 1.2+ MB\n"
]
}
],
"source": [
"df.info() # to see data types"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"df.isna().sum()\n",
"df.dropna(inplace= True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sentiments | \n",
" text | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Positive | \n",
" im getting on borderlands and i will murder yo... | \n",
"
\n",
" \n",
" 1 | \n",
" Positive | \n",
" I am coming to the borders and I will kill you... | \n",
"
\n",
" \n",
" 2 | \n",
" Positive | \n",
" im getting on borderlands and i will kill you ... | \n",
"
\n",
" \n",
" 3 | \n",
" Positive | \n",
" im coming on borderlands and i will murder you... | \n",
"
\n",
" \n",
" 4 | \n",
" Positive | \n",
" im getting on borderlands 2 and i will murder ... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sentiments text\n",
"0 Positive im getting on borderlands and i will murder yo...\n",
"1 Positive I am coming to the borders and I will kill you...\n",
"2 Positive im getting on borderlands and i will kill you ...\n",
"3 Positive im coming on borderlands and i will murder you...\n",
"4 Positive im getting on borderlands 2 and i will murder ..."
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def process_text(text):\n",
" text = text.lower()\n",
" text = re.sub(f'http\\S+','',text)\n",
" text = re.sub(r'@[a-zA-Z0-9_]+','',text)\n",
" text = re.sub(r'#','',text)\n",
" text = re.sub(r'[^a-zA-Z\\S]','',text)\n",
" return text"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"df['clean_text'] = df['text'].apply(process_text)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sentiments | \n",
" text | \n",
" clean_text | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Positive | \n",
" im getting on borderlands and i will murder yo... | \n",
" imgettingonborderlandsandiwillmurderyouall, | \n",
"
\n",
" \n",
" 1 | \n",
" Positive | \n",
" I am coming to the borders and I will kill you... | \n",
" iamcomingtothebordersandiwillkillyouall, | \n",
"
\n",
" \n",
" 2 | \n",
" Positive | \n",
" im getting on borderlands and i will kill you ... | \n",
" imgettingonborderlandsandiwillkillyouall, | \n",
"
\n",
" \n",
" 3 | \n",
" Positive | \n",
" im coming on borderlands and i will murder you... | \n",
" imcomingonborderlandsandiwillmurderyouall, | \n",
"
\n",
" \n",
" 4 | \n",
" Positive | \n",
" im getting on borderlands 2 and i will murder ... | \n",
" imgettingonborderlands2andiwillmurderyoumeall, | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sentiments text \\\n",
"0 Positive im getting on borderlands and i will murder yo... \n",
"1 Positive I am coming to the borders and I will kill you... \n",
"2 Positive im getting on borderlands and i will kill you ... \n",
"3 Positive im coming on borderlands and i will murder you... \n",
"4 Positive im getting on borderlands 2 and i will murder ... \n",
"\n",
" clean_text \n",
"0 imgettingonborderlandsandiwillmurderyouall, \n",
"1 iamcomingtothebordersandiwillkillyouall, \n",
"2 imgettingonborderlandsandiwillkillyouall, \n",
"3 imcomingonborderlandsandiwillmurderyouall, \n",
"4 imgettingonborderlands2andiwillmurderyoumeall, "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"sentiments\n",
"Negative 22624\n",
"Positive 20932\n",
"Neutral 18393\n",
"Irrelevant 13047\n",
"Name: count, dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['sentiments'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"count_vectorizer = CountVectorizer(max_features=5000)\n",
"count_matrix = count_vectorizer.fit_transform(df['clean_text'])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"X_train , X_test , y_train , y_test = train_test_split(count_matrix, df['clean_text'],test_size=0.2 , random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# nb_classifier = MultinomialNB()\n",
"# nb_classifier.fit(X_train , y_train)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# y_pred = nb_classifier.predict(X_test)\n",
"# accuracy = accuracy_score(y_test , y_pred)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}