Spaces:

divyesh01
/

twitter_sentiment_analysis

Sleeping

twitter_sentiment_analysis / Twitter_sentiment.py

Upload 6 files

dc3c28f verified about 1 month ago

1.59 kB

	import re
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.model_selection import train_test_split
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.metrics import accuracy_score
	import pandas as pd
	import warnings
	import pickle

	warnings.filterwarnings('ignore')

	# Load dataset
	df = pd.read_csv('./twitter_sentiment.csv', header=None, index_col=0)
	df = df[[2, 3]].reset_index(drop=True)
	df.rename(columns={2: "sentiments", 3: "text"}, inplace=True)

	# Drop missing values
	df.dropna(inplace=True)

	# Preprocess text
	def process_text(text):
	text = text.lower()
	text = re.sub(r'http\S+', '', text)
	text = re.sub(r'@[a-zA-Z0-9_]+', '', text)
	text = re.sub(r'#', '', text)
	text = re.sub(r'[^a-zA-Z\s]', '', text)
	return text

	df['clean_text'] = df['text'].apply(process_text)

	# Vectorize text
	count_vectorizer = CountVectorizer(max_features=5000)
	count_matrix = count_vectorizer.fit_transform(df['clean_text'])

	# Split data
	X_train, X_test, y_train, y_test = train_test_split(
	count_matrix, df['sentiments'], test_size=0.2, random_state=42
	)

	# Train model
	nb_classifier = MultinomialNB()
	nb_classifier.fit(X_train, y_train)

	# Predict and evaluate
	y_pred = nb_classifier.predict(X_test)
	accuracy = accuracy_score(y_test, y_pred)
	print("Accuracy:", accuracy)

	with open('count_vectorizer.pkl','wb') as vectorizer_file:
	pickle.dump(count_vectorizer , vectorizer_file)

	with open('nb_classifier.pkl','wb') as classifier_file:
	pickle.dump(nb_classifier , classifier_file)