Spaces:

AfshinMA
/

Tehran_House_Price_Prediction-Streamlit_App

Sleeping

App Files Files Community

AfshinMA commited on Nov 20, 2024

Commit

e88750c

verified ·

1 Parent(s): 1dda957

Upload 9 files

Browse files

Files changed (9) hide show

app.py +89 -0
datasets/cleaned_housePrice.csv +0 -0
datasets/housePrice.csv +0 -0
models/GradientBoostingRegressor_pipeline.joblib +3 -0
models/KernelRidge_pipeline.joblib +3 -0
models/XGBoostRegressor_pipeline.joblib +3 -0
models/tehran_house_price_preprocessor.joblib +3 -0
requirements.txt +0 -0
utils.py +83 -0

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import streamlit as st
+import pandas as pd
+import joblib
+from sklearn.metrics import r2_score
+from utils import data_cleaning
+# Load Dataset and Models
+DATASET_PATH = "C:/Users/Afshin/Desktop/10_Projects/Project_1_Tehran_House_Price_Prediction/datasets/housePrice.csv"
+MODELS_PATH = [
+    "C:/Users/Afshin/Desktop/10_Projects/Project_1_Tehran_House_Price_Prediction/models/KernelRidge_pipeline.joblib",
+    # "C:/Users/Afshin/Desktop/10_Projects/Project_1_Tehran_House_Price_Prediction/models/GradientBoostingRegressor_pipeline.joblib",
+    "C:/Users/Afshin/Desktop/10_Projects/Project_1_Tehran_House_Price_Prediction/models/XGBoostRegressor_pipeline.joblib",
+    "C:/Users/Afshin/Desktop/10_Projects/Project_1_Tehran_House_Price_Prediction/models/CatBoostRegressor_pipeline.joblib",
+]
+# Load the cleaned data
+df = data_cleaning(DATASET_PATH)
+# Prepare data for input fields
+min_area, max_area = df['Area'].min(), df['Area'].max()
+rooms = df['Room'].unique().tolist()
+addresses = df['Address'].unique().tolist()
+# Streamlit app layout
+st.title("🏠 Tehran House Price Prediction")
+st.sidebar.header("Input Parameters")
+# Sidebar for input fields
+st.sidebar.subheader("Enter the details:")
+area = st.sidebar.number_input("Area (m²)", min_value=min_area, max_value=max_area, value=min_area, step=10)
+room = st.sidebar.selectbox("Room", options=rooms)
+parking = st.sidebar.checkbox("Parking", value=True)
+warehouse = st.sidebar.checkbox("Warehouse", value=True)
+elevator = st.sidebar.checkbox("Elevator", value=True)
+address = st.sidebar.selectbox("Address", options=addresses)
+# Prepare input data for prediction
+sample = pd.DataFrame({
+    'Area': [area],
+    'Room': [room],
+    'Parking': [parking],
+    'Warehouse': [warehouse],
+    'Elevator': [elevator],
+    'Address': [address]
+})
+def load_and_predict(sample):
+    result = {
+        'Model': [],
+        'R2': [],
+        'Predicted_Price_(IRR)': []
+    }
+    # Define features and target variable
+    X = df.drop(columns=['Price'])  # Features
+    y = df['Price']
+    try:
+        for path in MODELS_PATH:
+            model_name = path.split('/')[-1].split('_')[0]
+            model = joblib.load(path)  # Load the model once
+            # Predict house price
+            y_pred = model.predict(X)
+            price_pred = model.predict(sample)[0]
+            result['Model'].append(model_name)
+            result['R2'].append(r2_score(y, y_pred))
+            result['Predicted_Price_(IRR)'].append(price_pred)
+    except Exception as e:
+        st.error(f"An error occurred during model loading or prediction: {str(e)}")
+        return None
+    return pd.DataFrame(result).sort_values(by=['R2'], ascending=False)
+# Predict button
+if st.sidebar.button("Predict"):
+    result_df = load_and_predict(sample)
+    if result_df is not None:
+        st.success('Predicted House Price:')
+        st.table(result_df)
+# Footer or additional information
+st.sidebar.markdown("### About this App")
+st.sidebar.markdown(
+    "This app predicts house prices based on input features such as area, number of rooms, "
+    "and facilities like parking, warehouse, and elevator. Please fill in all fields to get the prediction."
+)

datasets/cleaned_housePrice.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/housePrice.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

models/GradientBoostingRegressor_pipeline.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b4036d414847c75c050430594d81a1e429dd2c6211fa6b2645902856a462a6b
+size 3176595

models/KernelRidge_pipeline.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50686c1e622bbf7c4b9c7750673ebfb3a48f3048589979defcdc20602f7655ed
+size 187296

models/XGBoostRegressor_pipeline.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf25ca293e9d2eed0fd9a997bcd15a94556c77b6e70b441e7f261f16a8f5730f
+size 1174097

models/tehran_house_price_preprocessor.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f7f26dd9a0ec6db4d9fd4e039f5171ebad96c666d2c1ed34c87949ebb1a2db9
+size 6273

requirements.txt ADDED Viewed

Binary file (144 Bytes). View file

utils.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import re, os
+from bs4 import BeautifulSoup
+import pandas as pd
+import requests
+def convert_to_persian_numbers(text: str) -> str:
+    """Convert English numbers in a string to Persian numbers."""
+    english_to_persian = {
+        '0': '۰',
+        '1': '۱',
+        '2': '۲',
+        '3': '۳',
+        '4': '۴',
+        '5': '۵',
+        '6': '۶',
+        '7': '۷',
+        '8': '۸',
+        '9': '۹'
+    }
+    # Replace English digits with Persian digits
+    for eng_digit, persian_digit in english_to_persian.items():
+        text = text.replace(eng_digit, persian_digit)
+    return text
+def get_USD_to_IR() -> float:
+    url = 'https://www.tgju.org/profile/price_dollar_rl'
+    response = requests.get(url)
+    if response.status_code != 200:
+        print(f'> Error in fetching {url}: {response.status_code}.')
+        return 300000
+    soup = BeautifulSoup(response.text, 'html.parser')
+    per_usd = soup.find('span', {'data-col' : 'info.last_trade.PDrCotVal'}).text.replace(',', '')
+    return float(convert_to_persian_numbers(per_usd))
+def remove_outliers_iqr(data:pd.DataFrame, column_name:str, threshold:float= 1.5) -> pd.DataFrame:
+    """Remove outliers using IQR method."""
+    Q1 = data[column_name].quantile(0.25)
+    Q3 = data[column_name].quantile(0.75)
+    IQR = Q3 - Q1
+    return data[~((data[column_name] < (Q1 - threshold * IQR)) | (data[column_name] > (Q3 + threshold * IQR)))]
+def data_cleaning(path:str) -> pd.DataFrame:
+    # Step 1 : Load and Prepare data
+    # I : Load the dataset
+    cleaned_data_path = path.rsplit('/', 1)[0] + '/cleaned_housePrice.csv'
+    if os.path.exists(cleaned_data_path):
+        return pd.read_csv(cleaned_data_path)
+    else:
+        df = pd.read_csv(path)
+    # II : Update the **Price**
+    today_usd = get_USD_to_IR()
+    # Every USD is equal to 30,000 Tomans (Extra Info).
+    correct_coeff = today_usd / 300000
+    df.Price = df.Price.apply(lambda x: x * correct_coeff * 10)
+    # III : Drop irrequired columns
+    df = df.drop(['Price(USD)'], axis= 1)
+    # Step 2 : Data Cleaning
+    # I : Correct the datatype of columns
+    df.Area = df.Area.apply(lambda x: re.sub(r'\D', '', str(x)))
+    df.Area = pd.to_numeric(df.Area, errors= 'coerce')
+    # II : Handle **missing values**
+    # Drop Null Values
+    df.dropna(ignore_index= True, inplace= True)
+    # III : Handle **duplicates**
+    df = df.drop_duplicates(ignore_index= True)
+    # IV : Handle **outliers**
+    df = remove_outliers_iqr(df, 'Price')
+    df = remove_outliers_iqr(df, 'Area')
+    df.reset_index(drop= True, inplace= True)
+    # V : Save the cleaned dataset
+    df.to_csv(path.rsplit('/', 1)[0] + 'cleaned_housePrice.csv', index= False)
+    return df