AfshinMA commited on
Commit
e88750c
·
verified ·
1 Parent(s): 1dda957

Upload 9 files

Browse files
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import joblib
4
+ from sklearn.metrics import r2_score
5
+ from utils import data_cleaning
6
+
7
+ # Load Dataset and Models
8
+ DATASET_PATH = "C:/Users/Afshin/Desktop/10_Projects/Project_1_Tehran_House_Price_Prediction/datasets/housePrice.csv"
9
+ MODELS_PATH = [
10
+ "C:/Users/Afshin/Desktop/10_Projects/Project_1_Tehran_House_Price_Prediction/models/KernelRidge_pipeline.joblib",
11
+ # "C:/Users/Afshin/Desktop/10_Projects/Project_1_Tehran_House_Price_Prediction/models/GradientBoostingRegressor_pipeline.joblib",
12
+ "C:/Users/Afshin/Desktop/10_Projects/Project_1_Tehran_House_Price_Prediction/models/XGBoostRegressor_pipeline.joblib",
13
+ "C:/Users/Afshin/Desktop/10_Projects/Project_1_Tehran_House_Price_Prediction/models/CatBoostRegressor_pipeline.joblib",
14
+ ]
15
+
16
+ # Load the cleaned data
17
+ df = data_cleaning(DATASET_PATH)
18
+
19
+ # Prepare data for input fields
20
+ min_area, max_area = df['Area'].min(), df['Area'].max()
21
+ rooms = df['Room'].unique().tolist()
22
+ addresses = df['Address'].unique().tolist()
23
+
24
+ # Streamlit app layout
25
+ st.title("🏠 Tehran House Price Prediction")
26
+ st.sidebar.header("Input Parameters")
27
+
28
+ # Sidebar for input fields
29
+ st.sidebar.subheader("Enter the details:")
30
+ area = st.sidebar.number_input("Area (m²)", min_value=min_area, max_value=max_area, value=min_area, step=10)
31
+ room = st.sidebar.selectbox("Room", options=rooms)
32
+ parking = st.sidebar.checkbox("Parking", value=True)
33
+ warehouse = st.sidebar.checkbox("Warehouse", value=True)
34
+ elevator = st.sidebar.checkbox("Elevator", value=True)
35
+ address = st.sidebar.selectbox("Address", options=addresses)
36
+
37
+ # Prepare input data for prediction
38
+ sample = pd.DataFrame({
39
+ 'Area': [area],
40
+ 'Room': [room],
41
+ 'Parking': [parking],
42
+ 'Warehouse': [warehouse],
43
+ 'Elevator': [elevator],
44
+ 'Address': [address]
45
+ })
46
+
47
+ def load_and_predict(sample):
48
+ result = {
49
+ 'Model': [],
50
+ 'R2': [],
51
+ 'Predicted_Price_(IRR)': []
52
+ }
53
+
54
+ # Define features and target variable
55
+ X = df.drop(columns=['Price']) # Features
56
+ y = df['Price']
57
+
58
+ try:
59
+ for path in MODELS_PATH:
60
+ model_name = path.split('/')[-1].split('_')[0]
61
+ model = joblib.load(path) # Load the model once
62
+
63
+ # Predict house price
64
+ y_pred = model.predict(X)
65
+ price_pred = model.predict(sample)[0]
66
+
67
+ result['Model'].append(model_name)
68
+ result['R2'].append(r2_score(y, y_pred))
69
+ result['Predicted_Price_(IRR)'].append(price_pred)
70
+
71
+ except Exception as e:
72
+ st.error(f"An error occurred during model loading or prediction: {str(e)}")
73
+ return None
74
+ return pd.DataFrame(result).sort_values(by=['R2'], ascending=False)
75
+
76
+ # Predict button
77
+ if st.sidebar.button("Predict"):
78
+ result_df = load_and_predict(sample)
79
+
80
+ if result_df is not None:
81
+ st.success('Predicted House Price:')
82
+ st.table(result_df)
83
+
84
+ # Footer or additional information
85
+ st.sidebar.markdown("### About this App")
86
+ st.sidebar.markdown(
87
+ "This app predicts house prices based on input features such as area, number of rooms, "
88
+ "and facilities like parking, warehouse, and elevator. Please fill in all fields to get the prediction."
89
+ )
datasets/cleaned_housePrice.csv ADDED
The diff for this file is too large to render. See raw diff
 
datasets/housePrice.csv ADDED
The diff for this file is too large to render. See raw diff
 
models/GradientBoostingRegressor_pipeline.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b4036d414847c75c050430594d81a1e429dd2c6211fa6b2645902856a462a6b
3
+ size 3176595
models/KernelRidge_pipeline.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50686c1e622bbf7c4b9c7750673ebfb3a48f3048589979defcdc20602f7655ed
3
+ size 187296
models/XGBoostRegressor_pipeline.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf25ca293e9d2eed0fd9a997bcd15a94556c77b6e70b441e7f261f16a8f5730f
3
+ size 1174097
models/tehran_house_price_preprocessor.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f7f26dd9a0ec6db4d9fd4e039f5171ebad96c666d2c1ed34c87949ebb1a2db9
3
+ size 6273
requirements.txt ADDED
Binary file (144 Bytes). View file
 
utils.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re, os
2
+ from bs4 import BeautifulSoup
3
+ import pandas as pd
4
+ import requests
5
+
6
+ def convert_to_persian_numbers(text: str) -> str:
7
+ """Convert English numbers in a string to Persian numbers."""
8
+ english_to_persian = {
9
+ '0': '۰',
10
+ '1': '۱',
11
+ '2': '۲',
12
+ '3': '۳',
13
+ '4': '۴',
14
+ '5': '۵',
15
+ '6': '۶',
16
+ '7': '۷',
17
+ '8': '۸',
18
+ '9': '۹'
19
+ }
20
+
21
+ # Replace English digits with Persian digits
22
+ for eng_digit, persian_digit in english_to_persian.items():
23
+ text = text.replace(eng_digit, persian_digit)
24
+
25
+ return text
26
+
27
+ def get_USD_to_IR() -> float:
28
+ url = 'https://www.tgju.org/profile/price_dollar_rl'
29
+ response = requests.get(url)
30
+ if response.status_code != 200:
31
+ print(f'> Error in fetching {url}: {response.status_code}.')
32
+ return 300000
33
+ soup = BeautifulSoup(response.text, 'html.parser')
34
+ per_usd = soup.find('span', {'data-col' : 'info.last_trade.PDrCotVal'}).text.replace(',', '')
35
+ return float(convert_to_persian_numbers(per_usd))
36
+
37
+ def remove_outliers_iqr(data:pd.DataFrame, column_name:str, threshold:float= 1.5) -> pd.DataFrame:
38
+ """Remove outliers using IQR method."""
39
+ Q1 = data[column_name].quantile(0.25)
40
+ Q3 = data[column_name].quantile(0.75)
41
+ IQR = Q3 - Q1
42
+ return data[~((data[column_name] < (Q1 - threshold * IQR)) | (data[column_name] > (Q3 + threshold * IQR)))]
43
+
44
+ def data_cleaning(path:str) -> pd.DataFrame:
45
+ # Step 1 : Load and Prepare data
46
+ # I : Load the dataset
47
+ cleaned_data_path = path.rsplit('/', 1)[0] + '/cleaned_housePrice.csv'
48
+ if os.path.exists(cleaned_data_path):
49
+ return pd.read_csv(cleaned_data_path)
50
+ else:
51
+ df = pd.read_csv(path)
52
+
53
+ # II : Update the **Price**
54
+ today_usd = get_USD_to_IR()
55
+ # Every USD is equal to 30,000 Tomans (Extra Info).
56
+ correct_coeff = today_usd / 300000
57
+ df.Price = df.Price.apply(lambda x: x * correct_coeff * 10)
58
+
59
+ # III : Drop irrequired columns
60
+ df = df.drop(['Price(USD)'], axis= 1)
61
+
62
+ # Step 2 : Data Cleaning
63
+ # I : Correct the datatype of columns
64
+ df.Area = df.Area.apply(lambda x: re.sub(r'\D', '', str(x)))
65
+ df.Area = pd.to_numeric(df.Area, errors= 'coerce')
66
+
67
+ # II : Handle **missing values**
68
+ # Drop Null Values
69
+ df.dropna(ignore_index= True, inplace= True)
70
+
71
+ # III : Handle **duplicates**
72
+ df = df.drop_duplicates(ignore_index= True)
73
+
74
+ # IV : Handle **outliers**
75
+ df = remove_outliers_iqr(df, 'Price')
76
+ df = remove_outliers_iqr(df, 'Area')
77
+ df.reset_index(drop= True, inplace= True)
78
+
79
+ # V : Save the cleaned dataset
80
+ df.to_csv(path.rsplit('/', 1)[0] + 'cleaned_housePrice.csv', index= False)
81
+
82
+ return df
83
+