ratneshpasi03 commited on
Commit
707521a
·
1 Parent(s): f953f57

Update questions data

Browse files
data/questions/3/answer.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Winter Average PM2.5: 67.4923443634478
2
+ Monsoon Average PM2.5: 34.42519611317571
3
+ Summer Average PM2.5: nan
data/questions/3/code.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def true_code():
2
+ import pandas as pd
3
+
4
+ df = pd.read_csv('data/raw_data/Data.csv', sep=",")
5
+
6
+ df['Timestamp'] = pd.to_datetime(df['Timestamp'])
7
+ df['Year'] = df['Timestamp'].dt.year
8
+ df['Month'] = df['Timestamp'].dt.month
9
+
10
+ data = df[df['Year'] == 2018]
11
+ data = data[data['station'] == 'Lal Bahadur Shastri Nagar, Kalaburagi - KSPCB']
12
+
13
+ winter_data = data[(data['Month'] == 12) | (data['Month'] <= 2)]
14
+ summer_data = data[(data['Month'] >= 3) & (data['Month'] <= 5)]
15
+ monsoon_data = data[(data['Month'] >= 6) & (data['Month'] <= 9)]
16
+
17
+ summer_avg = summer_data['PM2.5'].mean()
18
+ winter_avg = winter_data['PM2.5'].mean()
19
+ monsoon_avg = monsoon_data['PM2.5'].mean()
20
+
21
+ print("Winter Average PM2.5:", winter_avg)
22
+ print("Monsoon Average PM2.5:", monsoon_avg)
23
+ print("Summer Average PM2.5:", summer_avg)
24
+
25
+ true_code()
data/questions/3/metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "question_id": 3,
3
+ "category": "temporal",
4
+ "answer_category": "multiple",
5
+ "plot": false,
6
+ "libraries": [
7
+ "pandas"
8
+ ]
9
+ }
data/questions/3/question.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ For the year 2018, calculate the average PM2.5 concentration for each season (Winter: December-February, Summer: March-May, and Monsoon: June-September) for station Lal Bahadur Shastri Nagar, Kalaburagi - KSPCB. Identify the season with the highest pollution and suggest potential factors contributing to the increase.
output.jsonl CHANGED
@@ -1,3 +1 @@
1
- {"folder": "0", "question": "Which state has the highest average PM2.5 concentration across all stations?", "answer": "Delhi", "code": "def true_code():\n import pandas as pd\n \n df = pd.read_csv('data/raw_data/Data.csv', sep=\",\")\n \n data = df.groupby(['state','station'])['PM2.5'].mean()\n ans = data.idxmax()[0]\n print(ans)\n\ntrue_code()", "metadata": {"question_id": 0, "category": "spatial", "answer_category": "single", "plot": false, "libraries": ["pandas"]}}
2
- {"folder": "1", "question": "Report the station that recorded the highest value of PM 2.5 for the month Aug of 2020", "answer": "Lal Bahadur Shastri Nagar, Kalaburagi ", "code": "def true_code():\n import pandas as pd\n \n df = pd.read_csv('data/raw_data/Data.csv', sep=\",\")\n \n df['Timestamp'] = pd.to_datetime(df['Timestamp'])\n \n df['Year'] = df['Timestamp'].dt.year\n df['Month'] = df['Timestamp'].dt.month\n data = df[(df['Year'] == 2020) & (df['Month'] == 8)]\n ans = data.groupby('station')['PM2.5'].max().idxmax()\n print(ans)\n\ntrue_code()", "metadata": {"question_id": 2, "category": "temporal", "answer_category": "double", "plot": false, "libraries": ["pandas"]}}
3
- {"folder": "2", "question": "Which state had the most days with hazardous PM2.5 levels (above 300 µg/m³)?", "answer": "Andhra Pradesh", "code": "def true_code():\n import pandas as pd\n \n df = pd.read_csv('data/raw_data/Data.csv', sep=\",\")\n \n data = df[df['PM2.5'] > 300]\n ans = data.groupby(['state', 'station']).value_counts().idxmax()[0]\n print(ans)\n\ntrue_code()", "metadata": {"question_id": 2, "category": "spatial", "answer_category": "single", "plot": false, "libraries": ["pandas"]}}
 
1
+ {"folder": 3, "question": "For the year 2018, calculate the average PM2.5 concentration for each season (Winter: December-February, Summer: March-May, and Monsoon: June-September) for station Lal Bahadur Shastri Nagar, Kalaburagi - KSPCB. Identify the season with the highest pollution and suggest potential factors contributing to the increase.", "answer": "Winter Average PM2.5: 67.4923443634478\nMonsoon Average PM2.5: 34.42519611317571\nSummer Average PM2.5: nan", "code": "def true_code():\n import pandas as pd\n \n df = pd.read_csv('data/raw_data/Data.csv', sep=\",\")\n \n df['Timestamp'] = pd.to_datetime(df['Timestamp'])\n df['Year'] = df['Timestamp'].dt.year\n df['Month'] = df['Timestamp'].dt.month\n \n data = df[df['Year'] == 2018]\n data = data[data['station'] == 'Lal Bahadur Shastri Nagar, Kalaburagi - KSPCB']\n \n winter_data = data[(data['Month'] == 12) | (data['Month'] <= 2)]\n summer_data = data[(data['Month'] >= 3) & (data['Month'] <= 5)]\n monsoon_data = data[(data['Month'] >= 6) & (data['Month'] <= 9)]\n \n summer_avg = summer_data['PM2.5'].mean()\n winter_avg = winter_data['PM2.5'].mean()\n monsoon_avg = monsoon_data['PM2.5'].mean()\n \n print(\"Winter Average PM2.5:\", winter_avg)\n print(\"Monsoon Average PM2.5:\", monsoon_avg)\n print(\"Summer Average PM2.5:\", summer_avg)\n\ntrue_code()", "metadata": {"question_id": 3, "category": "temporal", "answer_category": "multiple", "plot": false, "libraries": ["pandas"]}}