{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### **Twitter Sentiment Analysis**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import re\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.naive_bayes import MultinomialNB\n", "import pickle\n", "from sklearn.metrics import accuracy_score" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "import pandas as pd " ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
clean_textcategory
0when modi promised “minimum government maximum...-1.0
1talk all the nonsense and continue all the dra...0.0
2what did just say vote for modi welcome bjp t...1.0
3asking his supporters prefix chowkidar their n...1.0
4answer who among these the most powerful world...1.0
\n", "
" ], "text/plain": [ " clean_text category\n", "0 when modi promised “minimum government maximum... -1.0\n", "1 talk all the nonsense and continue all the dra... 0.0\n", "2 what did just say vote for modi welcome bjp t... 1.0\n", "3 asking his supporters prefix chowkidar their n... 1.0\n", "4 answer who among these the most powerful world... 1.0" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('./Twitter_Data.csv' )\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(75682, 3)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**drop unnecessary columns**" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df = df[[2,3]].reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
23
0Positiveim getting on borderlands and i will murder yo...
1PositiveI am coming to the borders and I will kill you...
2Positiveim getting on borderlands and i will kill you ...
3Positiveim coming on borderlands and i will murder you...
4Positiveim getting on borderlands 2 and i will murder ...
\n", "
" ], "text/plain": [ " 2 3\n", "0 Positive im getting on borderlands and i will murder yo...\n", "1 Positive I am coming to the borders and I will kill you...\n", "2 Positive im getting on borderlands and i will kill you ...\n", "3 Positive im coming on borderlands and i will murder you...\n", "4 Positive im getting on borderlands 2 and i will murder ..." ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sentimentstext
0Positiveim getting on borderlands and i will murder yo...
1PositiveI am coming to the borders and I will kill you...
2Positiveim getting on borderlands and i will kill you ...
3Positiveim coming on borderlands and i will murder you...
4Positiveim getting on borderlands 2 and i will murder ...
\n", "
" ], "text/plain": [ " sentiments text\n", "0 Positive im getting on borderlands and i will murder yo...\n", "1 Positive I am coming to the borders and I will kill you...\n", "2 Positive im getting on borderlands and i will kill you ...\n", "3 Positive im coming on borderlands and i will murder you...\n", "4 Positive im getting on borderlands 2 and i will murder ..." ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df.columns = ['sentiments','text']\n", "df.rename(columns={2 : \"sentiments\" , 3 : \"text\"} , inplace= True)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 75682 entries, 0 to 75681\n", "Data columns (total 2 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 sentiments 75682 non-null object\n", " 1 text 74996 non-null object\n", "dtypes: object(2)\n", "memory usage: 1.2+ MB\n" ] } ], "source": [ "df.info() # to see data types" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df.isna().sum()\n", "df.dropna(inplace= True)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sentimentstext
0Positiveim getting on borderlands and i will murder yo...
1PositiveI am coming to the borders and I will kill you...
2Positiveim getting on borderlands and i will kill you ...
3Positiveim coming on borderlands and i will murder you...
4Positiveim getting on borderlands 2 and i will murder ...
\n", "
" ], "text/plain": [ " sentiments text\n", "0 Positive im getting on borderlands and i will murder yo...\n", "1 Positive I am coming to the borders and I will kill you...\n", "2 Positive im getting on borderlands and i will kill you ...\n", "3 Positive im coming on borderlands and i will murder you...\n", "4 Positive im getting on borderlands 2 and i will murder ..." ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def process_text(text):\n", " text = text.lower()\n", " text = re.sub(f'http\\S+','',text)\n", " text = re.sub(r'@[a-zA-Z0-9_]+','',text)\n", " text = re.sub(r'#','',text)\n", " text = re.sub(r'[^a-zA-Z\\S]','',text)\n", " return text" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "df['clean_text'] = df['text'].apply(process_text)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sentimentstextclean_text
0Positiveim getting on borderlands and i will murder yo...imgettingonborderlandsandiwillmurderyouall,
1PositiveI am coming to the borders and I will kill you...iamcomingtothebordersandiwillkillyouall,
2Positiveim getting on borderlands and i will kill you ...imgettingonborderlandsandiwillkillyouall,
3Positiveim coming on borderlands and i will murder you...imcomingonborderlandsandiwillmurderyouall,
4Positiveim getting on borderlands 2 and i will murder ...imgettingonborderlands2andiwillmurderyoumeall,
\n", "
" ], "text/plain": [ " sentiments text \\\n", "0 Positive im getting on borderlands and i will murder yo... \n", "1 Positive I am coming to the borders and I will kill you... \n", "2 Positive im getting on borderlands and i will kill you ... \n", "3 Positive im coming on borderlands and i will murder you... \n", "4 Positive im getting on borderlands 2 and i will murder ... \n", "\n", " clean_text \n", "0 imgettingonborderlandsandiwillmurderyouall, \n", "1 iamcomingtothebordersandiwillkillyouall, \n", "2 imgettingonborderlandsandiwillkillyouall, \n", "3 imcomingonborderlandsandiwillmurderyouall, \n", "4 imgettingonborderlands2andiwillmurderyoumeall, " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "sentiments\n", "Negative 22624\n", "Positive 20932\n", "Neutral 18393\n", "Irrelevant 13047\n", "Name: count, dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['sentiments'].value_counts()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "count_vectorizer = CountVectorizer(max_features=5000)\n", "count_matrix = count_vectorizer.fit_transform(df['clean_text'])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "X_train , X_test , y_train , y_test = train_test_split(count_matrix, df['clean_text'],test_size=0.2 , random_state=42)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# nb_classifier = MultinomialNB()\n", "# nb_classifier.fit(X_train , y_train)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# y_pred = nb_classifier.predict(X_test)\n", "# accuracy = accuracy_score(y_test , y_pred)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.8" } }, "nbformat": 4, "nbformat_minor": 2 }