divyesh01 commited on
Commit
dc3c28f
Β·
verified Β·
1 Parent(s): c81821d

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ twitter_sentiment.csv filter=lfs diff=lfs merge=lfs -text
Twitter_Sentiment_analysis.ipynb ADDED
@@ -0,0 +1,637 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "### **Twitter Sentiment Analysis**"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 1,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "import re\n",
17
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
18
+ "from sklearn.model_selection import train_test_split\n",
19
+ "from sklearn.naive_bayes import MultinomialNB\n",
20
+ "import pickle\n",
21
+ "from sklearn.metrics import accuracy_score"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 2,
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "import warnings\n",
31
+ "warnings.filterwarnings('ignore')\n",
32
+ "\n",
33
+ "import pandas as pd "
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 20,
39
+ "metadata": {},
40
+ "outputs": [
41
+ {
42
+ "data": {
43
+ "text/html": [
44
+ "<div>\n",
45
+ "<style scoped>\n",
46
+ " .dataframe tbody tr th:only-of-type {\n",
47
+ " vertical-align: middle;\n",
48
+ " }\n",
49
+ "\n",
50
+ " .dataframe tbody tr th {\n",
51
+ " vertical-align: top;\n",
52
+ " }\n",
53
+ "\n",
54
+ " .dataframe thead th {\n",
55
+ " text-align: right;\n",
56
+ " }\n",
57
+ "</style>\n",
58
+ "<table border=\"1\" class=\"dataframe\">\n",
59
+ " <thead>\n",
60
+ " <tr style=\"text-align: right;\">\n",
61
+ " <th></th>\n",
62
+ " <th>clean_text</th>\n",
63
+ " <th>category</th>\n",
64
+ " </tr>\n",
65
+ " </thead>\n",
66
+ " <tbody>\n",
67
+ " <tr>\n",
68
+ " <th>0</th>\n",
69
+ " <td>when modi promised β€œminimum government maximum...</td>\n",
70
+ " <td>-1.0</td>\n",
71
+ " </tr>\n",
72
+ " <tr>\n",
73
+ " <th>1</th>\n",
74
+ " <td>talk all the nonsense and continue all the dra...</td>\n",
75
+ " <td>0.0</td>\n",
76
+ " </tr>\n",
77
+ " <tr>\n",
78
+ " <th>2</th>\n",
79
+ " <td>what did just say vote for modi welcome bjp t...</td>\n",
80
+ " <td>1.0</td>\n",
81
+ " </tr>\n",
82
+ " <tr>\n",
83
+ " <th>3</th>\n",
84
+ " <td>asking his supporters prefix chowkidar their n...</td>\n",
85
+ " <td>1.0</td>\n",
86
+ " </tr>\n",
87
+ " <tr>\n",
88
+ " <th>4</th>\n",
89
+ " <td>answer who among these the most powerful world...</td>\n",
90
+ " <td>1.0</td>\n",
91
+ " </tr>\n",
92
+ " </tbody>\n",
93
+ "</table>\n",
94
+ "</div>"
95
+ ],
96
+ "text/plain": [
97
+ " clean_text category\n",
98
+ "0 when modi promised β€œminimum government maximum... -1.0\n",
99
+ "1 talk all the nonsense and continue all the dra... 0.0\n",
100
+ "2 what did just say vote for modi welcome bjp t... 1.0\n",
101
+ "3 asking his supporters prefix chowkidar their n... 1.0\n",
102
+ "4 answer who among these the most powerful world... 1.0"
103
+ ]
104
+ },
105
+ "execution_count": 20,
106
+ "metadata": {},
107
+ "output_type": "execute_result"
108
+ }
109
+ ],
110
+ "source": [
111
+ "df = pd.read_csv('./Twitter_Data.csv' )\n",
112
+ "df.head()"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": 4,
118
+ "metadata": {},
119
+ "outputs": [
120
+ {
121
+ "data": {
122
+ "text/plain": [
123
+ "(75682, 3)"
124
+ ]
125
+ },
126
+ "execution_count": 4,
127
+ "metadata": {},
128
+ "output_type": "execute_result"
129
+ }
130
+ ],
131
+ "source": [
132
+ "df.shape"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "markdown",
137
+ "metadata": {},
138
+ "source": [
139
+ "**drop unnecessary columns**"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": 5,
145
+ "metadata": {},
146
+ "outputs": [],
147
+ "source": [
148
+ "df = df[[2,3]].reset_index(drop=True)"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": 6,
154
+ "metadata": {},
155
+ "outputs": [
156
+ {
157
+ "data": {
158
+ "text/html": [
159
+ "<div>\n",
160
+ "<style scoped>\n",
161
+ " .dataframe tbody tr th:only-of-type {\n",
162
+ " vertical-align: middle;\n",
163
+ " }\n",
164
+ "\n",
165
+ " .dataframe tbody tr th {\n",
166
+ " vertical-align: top;\n",
167
+ " }\n",
168
+ "\n",
169
+ " .dataframe thead th {\n",
170
+ " text-align: right;\n",
171
+ " }\n",
172
+ "</style>\n",
173
+ "<table border=\"1\" class=\"dataframe\">\n",
174
+ " <thead>\n",
175
+ " <tr style=\"text-align: right;\">\n",
176
+ " <th></th>\n",
177
+ " <th>2</th>\n",
178
+ " <th>3</th>\n",
179
+ " </tr>\n",
180
+ " </thead>\n",
181
+ " <tbody>\n",
182
+ " <tr>\n",
183
+ " <th>0</th>\n",
184
+ " <td>Positive</td>\n",
185
+ " <td>im getting on borderlands and i will murder yo...</td>\n",
186
+ " </tr>\n",
187
+ " <tr>\n",
188
+ " <th>1</th>\n",
189
+ " <td>Positive</td>\n",
190
+ " <td>I am coming to the borders and I will kill you...</td>\n",
191
+ " </tr>\n",
192
+ " <tr>\n",
193
+ " <th>2</th>\n",
194
+ " <td>Positive</td>\n",
195
+ " <td>im getting on borderlands and i will kill you ...</td>\n",
196
+ " </tr>\n",
197
+ " <tr>\n",
198
+ " <th>3</th>\n",
199
+ " <td>Positive</td>\n",
200
+ " <td>im coming on borderlands and i will murder you...</td>\n",
201
+ " </tr>\n",
202
+ " <tr>\n",
203
+ " <th>4</th>\n",
204
+ " <td>Positive</td>\n",
205
+ " <td>im getting on borderlands 2 and i will murder ...</td>\n",
206
+ " </tr>\n",
207
+ " </tbody>\n",
208
+ "</table>\n",
209
+ "</div>"
210
+ ],
211
+ "text/plain": [
212
+ " 2 3\n",
213
+ "0 Positive im getting on borderlands and i will murder yo...\n",
214
+ "1 Positive I am coming to the borders and I will kill you...\n",
215
+ "2 Positive im getting on borderlands and i will kill you ...\n",
216
+ "3 Positive im coming on borderlands and i will murder you...\n",
217
+ "4 Positive im getting on borderlands 2 and i will murder ..."
218
+ ]
219
+ },
220
+ "execution_count": 6,
221
+ "metadata": {},
222
+ "output_type": "execute_result"
223
+ }
224
+ ],
225
+ "source": [
226
+ "df.head()"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": 7,
232
+ "metadata": {},
233
+ "outputs": [
234
+ {
235
+ "data": {
236
+ "text/html": [
237
+ "<div>\n",
238
+ "<style scoped>\n",
239
+ " .dataframe tbody tr th:only-of-type {\n",
240
+ " vertical-align: middle;\n",
241
+ " }\n",
242
+ "\n",
243
+ " .dataframe tbody tr th {\n",
244
+ " vertical-align: top;\n",
245
+ " }\n",
246
+ "\n",
247
+ " .dataframe thead th {\n",
248
+ " text-align: right;\n",
249
+ " }\n",
250
+ "</style>\n",
251
+ "<table border=\"1\" class=\"dataframe\">\n",
252
+ " <thead>\n",
253
+ " <tr style=\"text-align: right;\">\n",
254
+ " <th></th>\n",
255
+ " <th>sentiments</th>\n",
256
+ " <th>text</th>\n",
257
+ " </tr>\n",
258
+ " </thead>\n",
259
+ " <tbody>\n",
260
+ " <tr>\n",
261
+ " <th>0</th>\n",
262
+ " <td>Positive</td>\n",
263
+ " <td>im getting on borderlands and i will murder yo...</td>\n",
264
+ " </tr>\n",
265
+ " <tr>\n",
266
+ " <th>1</th>\n",
267
+ " <td>Positive</td>\n",
268
+ " <td>I am coming to the borders and I will kill you...</td>\n",
269
+ " </tr>\n",
270
+ " <tr>\n",
271
+ " <th>2</th>\n",
272
+ " <td>Positive</td>\n",
273
+ " <td>im getting on borderlands and i will kill you ...</td>\n",
274
+ " </tr>\n",
275
+ " <tr>\n",
276
+ " <th>3</th>\n",
277
+ " <td>Positive</td>\n",
278
+ " <td>im coming on borderlands and i will murder you...</td>\n",
279
+ " </tr>\n",
280
+ " <tr>\n",
281
+ " <th>4</th>\n",
282
+ " <td>Positive</td>\n",
283
+ " <td>im getting on borderlands 2 and i will murder ...</td>\n",
284
+ " </tr>\n",
285
+ " </tbody>\n",
286
+ "</table>\n",
287
+ "</div>"
288
+ ],
289
+ "text/plain": [
290
+ " sentiments text\n",
291
+ "0 Positive im getting on borderlands and i will murder yo...\n",
292
+ "1 Positive I am coming to the borders and I will kill you...\n",
293
+ "2 Positive im getting on borderlands and i will kill you ...\n",
294
+ "3 Positive im coming on borderlands and i will murder you...\n",
295
+ "4 Positive im getting on borderlands 2 and i will murder ..."
296
+ ]
297
+ },
298
+ "execution_count": 7,
299
+ "metadata": {},
300
+ "output_type": "execute_result"
301
+ }
302
+ ],
303
+ "source": [
304
+ "# df.columns = ['sentiments','text']\n",
305
+ "df.rename(columns={2 : \"sentiments\" , 3 : \"text\"} , inplace= True)\n",
306
+ "df.head()"
307
+ ]
308
+ },
309
+ {
310
+ "cell_type": "code",
311
+ "execution_count": 8,
312
+ "metadata": {},
313
+ "outputs": [
314
+ {
315
+ "name": "stdout",
316
+ "output_type": "stream",
317
+ "text": [
318
+ "<class 'pandas.core.frame.DataFrame'>\n",
319
+ "RangeIndex: 75682 entries, 0 to 75681\n",
320
+ "Data columns (total 2 columns):\n",
321
+ " # Column Non-Null Count Dtype \n",
322
+ "--- ------ -------------- ----- \n",
323
+ " 0 sentiments 75682 non-null object\n",
324
+ " 1 text 74996 non-null object\n",
325
+ "dtypes: object(2)\n",
326
+ "memory usage: 1.2+ MB\n"
327
+ ]
328
+ }
329
+ ],
330
+ "source": [
331
+ "df.info() # to see data types"
332
+ ]
333
+ },
334
+ {
335
+ "cell_type": "code",
336
+ "execution_count": 9,
337
+ "metadata": {},
338
+ "outputs": [],
339
+ "source": [
340
+ "df.isna().sum()\n",
341
+ "df.dropna(inplace= True)"
342
+ ]
343
+ },
344
+ {
345
+ "cell_type": "code",
346
+ "execution_count": 10,
347
+ "metadata": {},
348
+ "outputs": [
349
+ {
350
+ "data": {
351
+ "text/html": [
352
+ "<div>\n",
353
+ "<style scoped>\n",
354
+ " .dataframe tbody tr th:only-of-type {\n",
355
+ " vertical-align: middle;\n",
356
+ " }\n",
357
+ "\n",
358
+ " .dataframe tbody tr th {\n",
359
+ " vertical-align: top;\n",
360
+ " }\n",
361
+ "\n",
362
+ " .dataframe thead th {\n",
363
+ " text-align: right;\n",
364
+ " }\n",
365
+ "</style>\n",
366
+ "<table border=\"1\" class=\"dataframe\">\n",
367
+ " <thead>\n",
368
+ " <tr style=\"text-align: right;\">\n",
369
+ " <th></th>\n",
370
+ " <th>sentiments</th>\n",
371
+ " <th>text</th>\n",
372
+ " </tr>\n",
373
+ " </thead>\n",
374
+ " <tbody>\n",
375
+ " <tr>\n",
376
+ " <th>0</th>\n",
377
+ " <td>Positive</td>\n",
378
+ " <td>im getting on borderlands and i will murder yo...</td>\n",
379
+ " </tr>\n",
380
+ " <tr>\n",
381
+ " <th>1</th>\n",
382
+ " <td>Positive</td>\n",
383
+ " <td>I am coming to the borders and I will kill you...</td>\n",
384
+ " </tr>\n",
385
+ " <tr>\n",
386
+ " <th>2</th>\n",
387
+ " <td>Positive</td>\n",
388
+ " <td>im getting on borderlands and i will kill you ...</td>\n",
389
+ " </tr>\n",
390
+ " <tr>\n",
391
+ " <th>3</th>\n",
392
+ " <td>Positive</td>\n",
393
+ " <td>im coming on borderlands and i will murder you...</td>\n",
394
+ " </tr>\n",
395
+ " <tr>\n",
396
+ " <th>4</th>\n",
397
+ " <td>Positive</td>\n",
398
+ " <td>im getting on borderlands 2 and i will murder ...</td>\n",
399
+ " </tr>\n",
400
+ " </tbody>\n",
401
+ "</table>\n",
402
+ "</div>"
403
+ ],
404
+ "text/plain": [
405
+ " sentiments text\n",
406
+ "0 Positive im getting on borderlands and i will murder yo...\n",
407
+ "1 Positive I am coming to the borders and I will kill you...\n",
408
+ "2 Positive im getting on borderlands and i will kill you ...\n",
409
+ "3 Positive im coming on borderlands and i will murder you...\n",
410
+ "4 Positive im getting on borderlands 2 and i will murder ..."
411
+ ]
412
+ },
413
+ "execution_count": 10,
414
+ "metadata": {},
415
+ "output_type": "execute_result"
416
+ }
417
+ ],
418
+ "source": [
419
+ "df.head()"
420
+ ]
421
+ },
422
+ {
423
+ "cell_type": "code",
424
+ "execution_count": 11,
425
+ "metadata": {},
426
+ "outputs": [],
427
+ "source": [
428
+ "def process_text(text):\n",
429
+ " text = text.lower()\n",
430
+ " text = re.sub(f'http\\S+','',text)\n",
431
+ " text = re.sub(r'@[a-zA-Z0-9_]+','',text)\n",
432
+ " text = re.sub(r'#','',text)\n",
433
+ " text = re.sub(r'[^a-zA-Z\\S]','',text)\n",
434
+ " return text"
435
+ ]
436
+ },
437
+ {
438
+ "cell_type": "code",
439
+ "execution_count": 12,
440
+ "metadata": {},
441
+ "outputs": [],
442
+ "source": [
443
+ "df['clean_text'] = df['text'].apply(process_text)"
444
+ ]
445
+ },
446
+ {
447
+ "cell_type": "code",
448
+ "execution_count": 13,
449
+ "metadata": {},
450
+ "outputs": [
451
+ {
452
+ "data": {
453
+ "text/html": [
454
+ "<div>\n",
455
+ "<style scoped>\n",
456
+ " .dataframe tbody tr th:only-of-type {\n",
457
+ " vertical-align: middle;\n",
458
+ " }\n",
459
+ "\n",
460
+ " .dataframe tbody tr th {\n",
461
+ " vertical-align: top;\n",
462
+ " }\n",
463
+ "\n",
464
+ " .dataframe thead th {\n",
465
+ " text-align: right;\n",
466
+ " }\n",
467
+ "</style>\n",
468
+ "<table border=\"1\" class=\"dataframe\">\n",
469
+ " <thead>\n",
470
+ " <tr style=\"text-align: right;\">\n",
471
+ " <th></th>\n",
472
+ " <th>sentiments</th>\n",
473
+ " <th>text</th>\n",
474
+ " <th>clean_text</th>\n",
475
+ " </tr>\n",
476
+ " </thead>\n",
477
+ " <tbody>\n",
478
+ " <tr>\n",
479
+ " <th>0</th>\n",
480
+ " <td>Positive</td>\n",
481
+ " <td>im getting on borderlands and i will murder yo...</td>\n",
482
+ " <td>imgettingonborderlandsandiwillmurderyouall,</td>\n",
483
+ " </tr>\n",
484
+ " <tr>\n",
485
+ " <th>1</th>\n",
486
+ " <td>Positive</td>\n",
487
+ " <td>I am coming to the borders and I will kill you...</td>\n",
488
+ " <td>iamcomingtothebordersandiwillkillyouall,</td>\n",
489
+ " </tr>\n",
490
+ " <tr>\n",
491
+ " <th>2</th>\n",
492
+ " <td>Positive</td>\n",
493
+ " <td>im getting on borderlands and i will kill you ...</td>\n",
494
+ " <td>imgettingonborderlandsandiwillkillyouall,</td>\n",
495
+ " </tr>\n",
496
+ " <tr>\n",
497
+ " <th>3</th>\n",
498
+ " <td>Positive</td>\n",
499
+ " <td>im coming on borderlands and i will murder you...</td>\n",
500
+ " <td>imcomingonborderlandsandiwillmurderyouall,</td>\n",
501
+ " </tr>\n",
502
+ " <tr>\n",
503
+ " <th>4</th>\n",
504
+ " <td>Positive</td>\n",
505
+ " <td>im getting on borderlands 2 and i will murder ...</td>\n",
506
+ " <td>imgettingonborderlands2andiwillmurderyoumeall,</td>\n",
507
+ " </tr>\n",
508
+ " </tbody>\n",
509
+ "</table>\n",
510
+ "</div>"
511
+ ],
512
+ "text/plain": [
513
+ " sentiments text \\\n",
514
+ "0 Positive im getting on borderlands and i will murder yo... \n",
515
+ "1 Positive I am coming to the borders and I will kill you... \n",
516
+ "2 Positive im getting on borderlands and i will kill you ... \n",
517
+ "3 Positive im coming on borderlands and i will murder you... \n",
518
+ "4 Positive im getting on borderlands 2 and i will murder ... \n",
519
+ "\n",
520
+ " clean_text \n",
521
+ "0 imgettingonborderlandsandiwillmurderyouall, \n",
522
+ "1 iamcomingtothebordersandiwillkillyouall, \n",
523
+ "2 imgettingonborderlandsandiwillkillyouall, \n",
524
+ "3 imcomingonborderlandsandiwillmurderyouall, \n",
525
+ "4 imgettingonborderlands2andiwillmurderyoumeall, "
526
+ ]
527
+ },
528
+ "execution_count": 13,
529
+ "metadata": {},
530
+ "output_type": "execute_result"
531
+ }
532
+ ],
533
+ "source": [
534
+ "df.head()"
535
+ ]
536
+ },
537
+ {
538
+ "cell_type": "code",
539
+ "execution_count": 14,
540
+ "metadata": {},
541
+ "outputs": [
542
+ {
543
+ "data": {
544
+ "text/plain": [
545
+ "sentiments\n",
546
+ "Negative 22624\n",
547
+ "Positive 20932\n",
548
+ "Neutral 18393\n",
549
+ "Irrelevant 13047\n",
550
+ "Name: count, dtype: int64"
551
+ ]
552
+ },
553
+ "execution_count": 14,
554
+ "metadata": {},
555
+ "output_type": "execute_result"
556
+ }
557
+ ],
558
+ "source": [
559
+ "df['sentiments'].value_counts()"
560
+ ]
561
+ },
562
+ {
563
+ "cell_type": "code",
564
+ "execution_count": 15,
565
+ "metadata": {},
566
+ "outputs": [],
567
+ "source": [
568
+ "count_vectorizer = CountVectorizer(max_features=5000)\n",
569
+ "count_matrix = count_vectorizer.fit_transform(df['clean_text'])"
570
+ ]
571
+ },
572
+ {
573
+ "cell_type": "code",
574
+ "execution_count": 16,
575
+ "metadata": {},
576
+ "outputs": [],
577
+ "source": [
578
+ "X_train , X_test , y_train , y_test = train_test_split(count_matrix, df['clean_text'],test_size=0.2 , random_state=42)"
579
+ ]
580
+ },
581
+ {
582
+ "cell_type": "code",
583
+ "execution_count": 17,
584
+ "metadata": {},
585
+ "outputs": [],
586
+ "source": [
587
+ "# nb_classifier = MultinomialNB()\n",
588
+ "# nb_classifier.fit(X_train , y_train)"
589
+ ]
590
+ },
591
+ {
592
+ "cell_type": "code",
593
+ "execution_count": 18,
594
+ "metadata": {},
595
+ "outputs": [],
596
+ "source": [
597
+ "# y_pred = nb_classifier.predict(X_test)\n",
598
+ "# accuracy = accuracy_score(y_test , y_pred)"
599
+ ]
600
+ },
601
+ {
602
+ "cell_type": "code",
603
+ "execution_count": null,
604
+ "metadata": {},
605
+ "outputs": [],
606
+ "source": []
607
+ },
608
+ {
609
+ "cell_type": "code",
610
+ "execution_count": null,
611
+ "metadata": {},
612
+ "outputs": [],
613
+ "source": []
614
+ }
615
+ ],
616
+ "metadata": {
617
+ "kernelspec": {
618
+ "display_name": "Python 3",
619
+ "language": "python",
620
+ "name": "python3"
621
+ },
622
+ "language_info": {
623
+ "codemirror_mode": {
624
+ "name": "ipython",
625
+ "version": 3
626
+ },
627
+ "file_extension": ".py",
628
+ "mimetype": "text/x-python",
629
+ "name": "python",
630
+ "nbconvert_exporter": "python",
631
+ "pygments_lexer": "ipython3",
632
+ "version": "3.12.8"
633
+ }
634
+ },
635
+ "nbformat": 4,
636
+ "nbformat_minor": 2
637
+ }
Twitter_sentiment.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from sklearn.feature_extraction.text import CountVectorizer
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.naive_bayes import MultinomialNB
5
+ from sklearn.metrics import accuracy_score
6
+ import pandas as pd
7
+ import warnings
8
+ import pickle
9
+
10
+ warnings.filterwarnings('ignore')
11
+
12
+ # Load dataset
13
+ df = pd.read_csv('./twitter_sentiment.csv', header=None, index_col=0)
14
+ df = df[[2, 3]].reset_index(drop=True)
15
+ df.rename(columns={2: "sentiments", 3: "text"}, inplace=True)
16
+
17
+ # Drop missing values
18
+ df.dropna(inplace=True)
19
+
20
+ # Preprocess text
21
+ def process_text(text):
22
+ text = text.lower()
23
+ text = re.sub(r'http\S+', '', text)
24
+ text = re.sub(r'@[a-zA-Z0-9_]+', '', text)
25
+ text = re.sub(r'#', '', text)
26
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
27
+ return text
28
+
29
+ df['clean_text'] = df['text'].apply(process_text)
30
+
31
+ # Vectorize text
32
+ count_vectorizer = CountVectorizer(max_features=5000)
33
+ count_matrix = count_vectorizer.fit_transform(df['clean_text'])
34
+
35
+ # Split data
36
+ X_train, X_test, y_train, y_test = train_test_split(
37
+ count_matrix, df['sentiments'], test_size=0.2, random_state=42
38
+ )
39
+
40
+ # Train model
41
+ nb_classifier = MultinomialNB()
42
+ nb_classifier.fit(X_train, y_train)
43
+
44
+ # Predict and evaluate
45
+ y_pred = nb_classifier.predict(X_test)
46
+ accuracy = accuracy_score(y_test, y_pred)
47
+ print("Accuracy:", accuracy)
48
+
49
+ with open('count_vectorizer.pkl','wb') as vectorizer_file:
50
+ pickle.dump(count_vectorizer , vectorizer_file)
51
+
52
+ with open('nb_classifier.pkl','wb') as classifier_file:
53
+ pickle.dump(nb_classifier , classifier_file)
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import re
4
+ from sklearn.feature_extraction.text import CountVectorizer
5
+
6
+ with open('count_vectorizer.pkl','rb')as vectorizer_file:
7
+ count_vectorizer = pickle.load(vectorizer_file)
8
+
9
+ with open('nb_classifier.pkl','rb')as classifier_file:
10
+ nb_classifier = pickle.load(classifier_file)
11
+
12
+ def process_text(text):
13
+ text = text.lower()
14
+ text = re.sub(r'http\S+', '', text)
15
+ text = re.sub(r'@[a-zA-Z0-9_]+', '', text)
16
+ text = re.sub(r'#', '', text)
17
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
18
+ return text
19
+
20
+ sentiment_mapping = {
21
+ "Negative" : "Negative πŸ˜”",
22
+ "Positive" : "Positive 😊",
23
+ "Neutral" : "Neutral πŸ™„",
24
+ "Irrelevant" : "Irrelevant πŸ€·β€β™‚οΈ"
25
+ }
26
+
27
+ def main():
28
+ col1 , col2 , col3 ,col4 = st.columns([1,1,3,1])
29
+ with col3:
30
+ st.image("./image/pngwing.com (1).png" , width=100)
31
+ st.title("Twitter Sentiment Classifier")
32
+ st.write("Enter twitter tweet below :")
33
+ input_text = st.text_area("Input Text :","")
34
+ if st.button("Predict"):
35
+ cleaned_text = process_text(input_text)
36
+ vectorizer_text = count_vectorizer.transform([cleaned_text])
37
+ sentiment_prediction = nb_classifier.predict(vectorizer_text)[0]
38
+
39
+ predicted_sentiment = sentiment_mapping.get(sentiment_prediction , "Unknown Sentiment")
40
+
41
+ st.write("Predicted Sentimen :")
42
+ st.title(predicted_sentiment)
43
+
44
+
45
+ if __name__ == "__main__":
46
+ main()
count_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0283cf923e981e93aad9148b59bd974d6658ffb49fff332abd81bf0d5da693c6
3
+ size 141069
nb_classifier.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:020b96fa1ba1660e47bf4634550a7f019a3487205c07aaa4b62efc02f9281ad2
3
+ size 320788
twitter_sentiment.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a94670bd8955bc3f5a740cd72797cd763c1ae5fdfea5eef619c1a28435c203
3
+ size 10489426