from flask import Flask, request, jsonify import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split from tensorflow.keras.models import Sequential, load_model from tensorflow.keras.layers import LSTM, Dense from tensorflow.keras.optimizers import Adam import os np.seterr(invalid='ignore') # Ignore invalid (NaN) values from flask_cors import CORS # Add this import app = Flask(__name__) CORS(app) # Load and prepare the data df = pd.read_csv('complete_data.csv') df['Date'] = pd.to_datetime(df['Date'], format='mixed') df = df.sort_values(['Company', 'Date']) # Create target variable (1 if price went up, 0 if down) df['Target'] = (df.groupby('Company')['Close'].shift(-1) > df['Close']).astype(int) # Select features for the model features = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Sentiment'] # Handle null values def handle_nulls(group): for feature in features: if group[feature].isnull().any(): group[feature] = group[feature].fillna(group[feature].median()) return group df = df.groupby('Company').apply(handle_nulls).reset_index(drop=True) # Normalize the features scaler = MinMaxScaler() df[features] = scaler.fit_transform(df[features]) # Function to create sequences def create_sequences(data, seq_length): X, y = [], [] for i in range(len(data) - seq_length): X.append(data.iloc[i:(i + seq_length)][features].values) y.append(data.iloc[i + seq_length]['Target']) return np.array(X), np.array(y) # Prepare data for each company sequence_length = 10 # You can adjust this company_models = {} models_dir = 'models' if not os.path.exists(models_dir): os.makedirs(models_dir) def load_company_models(): for file in os.listdir(models_dir): if file.endswith('.h5'): company = file.split('.h5')[0] model_path = os.path.join(models_dir, file) company_models[company] = load_model(model_path) # Load the models when the app starts load_company_models() for company in df['Company'].unique(): if pd.isna(company): print("Skipping NaN company name") continue print(f"Processing company: {company}") company_data = df[df['Company'] == company] print(f"Company data shape: {company_data.shape}") if len(company_data) <= sequence_length: print(f"Skipping {company} due to insufficient data") continue model_path = os.path.join(models_dir, f'{company}.h5') if os.path.exists(model_path): print(f"Loading existing model for {company}") company_models[company] = load_model(model_path) else: try: X, y = create_sequences(company_data, sequence_length) print(f"Sequences created. X shape: {X.shape}, y shape: {y.shape}") # Split into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create and compile the model model = Sequential([ LSTM(50, activation='relu', input_shape=(sequence_length, len(features))), Dense(1, activation='sigmoid') ]) model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy']) # Train the model model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=0) # Evaluate the model loss, accuracy = model.evaluate(X_test, y_test, verbose=0) print(f"{company} - Test Accuracy: {accuracy:.4f}") # Save the model model.save(model_path) company_models[company] = model except Exception as e: print(f"Error processing {company}: {str(e)}") continue def predict_stock_movement(company, latest_data): if company not in company_models: return "Company not found in the dataset." model = company_models[company] # Ensure we're only using the specified features latest_data_features = latest_data[features] # Scale the data latest_data_scaled = scaler.transform(latest_data_features) # Reshape for LSTM input (samples, time steps, features) sequence = latest_data_scaled.reshape(1, sequence_length, len(features)) prediction = model.predict(sequence)[0, 0] return "Up" if prediction > 0.51 else "Down" @app.route('/predict', methods=['POST']) def predict(): data = request.json company_name = data.get('company') if not company_name: return jsonify({"error": "Company name is required"}), 400 latest_data = df[df['Company'] == company_name].tail(sequence_length) if latest_data.empty: return jsonify({"error": "Company not found or insufficient data"}), 404 prediction = predict_stock_movement(company_name, latest_data) return jsonify({"company": company_name, "prediction": prediction}) if __name__ == '__main__': app.run(debug=True)