burtenshaw commited on
Commit
6f46aeb
·
0 Parent(s):

first commit

Browse files
Files changed (6) hide show
  1. .python-version +1 -0
  2. README.md +17 -0
  3. app.py +290 -0
  4. example.json +32 -0
  5. pyproject.toml +12 -0
  6. uv.lock +0 -0
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Dataset Quiz
3
+ emoji: 🔥
4
+ colorFrom: pink
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 5.13.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ short_description: A quiz app for rows of a dataset
12
+ hf_oauth: true
13
+ ---
14
+
15
+ # Dataset Quiz
16
+
17
+ A quiz app for rows of a dataset.
app.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime
3
+ import random
4
+
5
+ import gradio as gr
6
+ from datasets import load_dataset, Dataset, DatasetDict
7
+ from huggingface_hub import whoami, InferenceClient
8
+
9
+ # Initialize the inference client
10
+ client = InferenceClient(
11
+ api_key=os.getenv("HF_API_KEY"), # Make sure to set this environment variable
12
+ )
13
+
14
+ # Load questions from Hugging Face dataset
15
+ EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 5 # We have 5 questions total
16
+ EXAM_PASSING_SCORE = os.getenv("EXAM_PASSING_SCORE") or 0.0
17
+ EXAM_DATASET_ID = "agents-course/dummy-code-quiz"
18
+
19
+ # prep the dataset for the quiz
20
+ ds = load_dataset(EXAM_DATASET_ID, split="train")
21
+ quiz_data = ds.to_list()
22
+ random.shuffle(quiz_data)
23
+
24
+ def check_code(user_code, solution, challenge):
25
+ """
26
+ Use LLM to evaluate if the user's code solution is correct.
27
+ Returns True if the solution is correct, False otherwise.
28
+ """
29
+ prompt = f"""You are an expert Python programming instructor evaluating a student's code solution.
30
+
31
+ Challenge:
32
+ {challenge}
33
+
34
+ Reference Solution:
35
+ {solution}
36
+
37
+ Student's Solution:
38
+ {user_code}
39
+
40
+ Evaluate if the student's solution is functionally equivalent to the reference solution.
41
+ Consider:
42
+ 1. Does it solve the problem correctly?
43
+ 2. Does it handle edge cases appropriately?
44
+ 3. Does it follow the requirements of the challenge?
45
+
46
+ Respond with ONLY "CORRECT" or "INCORRECT" followed by a brief explanation.
47
+ """
48
+
49
+ messages = [{"role": "user", "content": prompt}]
50
+
51
+ try:
52
+ completion = client.chat.completions.create(
53
+ model="Qwen/Qwen2.5-Coder-32B-Instruct",
54
+ messages=messages,
55
+ max_tokens=500,
56
+ )
57
+
58
+ response = completion.choices[0].message.content.strip()
59
+
60
+ # Extract the verdict from the response
61
+ is_correct = response.upper().startswith("CORRECT")
62
+
63
+ # Add the explanation to the status text
64
+ explanation = response.split("\n", 1)[1] if "\n" in response else ""
65
+ gr.Info(explanation)
66
+
67
+ return is_correct
68
+
69
+ except Exception as e:
70
+ gr.Warning(f"Error checking code: {str(e)}")
71
+ # Fall back to simple string comparison if LLM fails
72
+ return user_code.strip() == solution.strip()
73
+
74
+
75
+ def on_user_logged_in(token: gr.OAuthToken | None):
76
+ """Handle user login state"""
77
+ if token is not None:
78
+ return gr.update(visible=False), gr.update(visible=True)
79
+ else:
80
+ return gr.update(visible=True), gr.update(visible=False)
81
+
82
+
83
+ def push_results_to_hub(
84
+ user_answers: list, token: gr.OAuthToken | None, signed_in_message: str
85
+ ):
86
+ """Push results to Hugging Face Hub."""
87
+
88
+ print(f"signed_in_message: {signed_in_message}")
89
+
90
+ if not user_answers: # Check if there are any answers to submit
91
+ gr.Warning("No answers to submit!")
92
+ return "No answers to submit!"
93
+
94
+ if token is None:
95
+ gr.Warning("Please log in to Hugging Face before pushing!")
96
+ return "Please log in to Hugging Face before pushing!"
97
+
98
+ # Calculate grade
99
+ correct_count = sum(1 for answer in user_answers if answer["is_correct"])
100
+ total_questions = len(user_answers)
101
+ grade = correct_count / total_questions if total_questions > 0 else 0
102
+
103
+ if grade < float(EXAM_PASSING_SCORE):
104
+ gr.Warning(
105
+ f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}"
106
+ )
107
+ return f"You scored {grade:.1%}. Please try again to achieve at least {float(EXAM_PASSING_SCORE):.1%}"
108
+
109
+ gr.Info("Submitting answers to the Hub. Please wait...", duration=2)
110
+
111
+ user_info = whoami(token=token.token)
112
+ username = user_info["name"]
113
+ repo_id = f"{EXAM_DATASET_ID}_responses"
114
+ submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
115
+
116
+ # Create a dataset with the user's answers and metadata
117
+ submission_data = [
118
+ {
119
+ "username": username,
120
+ "datetime": submission_time,
121
+ "grade": grade,
122
+ **answer, # Include all answer data
123
+ }
124
+ for answer in user_answers
125
+ ]
126
+
127
+ try:
128
+ # Try to load existing dataset
129
+ existing_ds = load_dataset(repo_id)
130
+ # Convert to DatasetDict if it isn't already
131
+ if not isinstance(existing_ds, dict):
132
+ existing_ds = DatasetDict({"default": existing_ds})
133
+ except Exception:
134
+ # If dataset doesn't exist, create empty DatasetDict
135
+ existing_ds = DatasetDict()
136
+
137
+ # Create new dataset from submission
138
+ new_ds = Dataset.from_list(submission_data)
139
+
140
+ # Add or update the split for this user
141
+ existing_ds[username] = new_ds
142
+
143
+ # Push the updated dataset to the Hub
144
+ existing_ds.push_to_hub(
145
+ repo_id,
146
+ private=True, # Make it private by default since it contains student submissions
147
+ )
148
+
149
+ return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}"
150
+
151
+
152
+ def handle_quiz(question_idx, user_answers, submitted_code, is_start):
153
+ """Handle quiz state and progression"""
154
+ # Hide the start button once the first question is shown
155
+ start_btn_update = gr.update(visible=False) if is_start else None
156
+
157
+ # If this is the first time (start=True), begin at question_idx=0
158
+ if is_start:
159
+ question_idx = 0
160
+ else:
161
+ # If not the first question and there's a submission, store the user's last submission
162
+ if (
163
+ question_idx < len(quiz_data) and submitted_code.strip()
164
+ ): # Only check if there's code
165
+ current_q = quiz_data[question_idx]
166
+ is_correct = check_code(
167
+ submitted_code, current_q["solution"], current_q["challenge"]
168
+ )
169
+ user_answers.append(
170
+ {
171
+ "challenge": current_q["challenge"],
172
+ "submitted_code": submitted_code,
173
+ "correct_solution": current_q["solution"],
174
+ "is_correct": is_correct,
175
+ }
176
+ )
177
+ question_idx += 1
178
+
179
+ # If we've reached the end, show final results
180
+ if question_idx >= len(quiz_data):
181
+ correct_count = sum(1 for answer in user_answers if answer["is_correct"])
182
+ grade = correct_count / len(user_answers)
183
+ results_text = (
184
+ f"**Quiz Complete!**\n\n"
185
+ f"Your score: {grade:.1%}\n"
186
+ f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n"
187
+ f"Your answers:\n\n"
188
+ )
189
+ for idx, answer in enumerate(user_answers):
190
+ results_text += (
191
+ f"Question {idx + 1}: {'✅' if answer['is_correct'] else '❌'}\n"
192
+ )
193
+ results_text += (
194
+ f"Your code:\n```python\n{answer['submitted_code']}\n```\n\n"
195
+ )
196
+
197
+ return (
198
+ "", # question_text becomes blank
199
+ gr.update(value="", visible=False), # clear and hide code input
200
+ f"{'✅ Passed!' if grade >= float(EXAM_PASSING_SCORE) else '❌ Did not pass'}",
201
+ question_idx,
202
+ user_answers,
203
+ start_btn_update,
204
+ gr.update(value=results_text, visible=True), # show final_markdown
205
+ )
206
+ else:
207
+ # Show the next question
208
+ q = quiz_data[question_idx]
209
+ challenge_text = f"## Question {question_idx + 1} \n### {q['challenge']}"
210
+ return (
211
+ challenge_text,
212
+ gr.update(value=q["placeholder"], visible=True),
213
+ "Submit your code solution and click 'Next' to continue.",
214
+ question_idx,
215
+ user_answers,
216
+ start_btn_update,
217
+ gr.update(visible=False), # Hide final_markdown
218
+ )
219
+
220
+
221
+ with gr.Blocks() as demo:
222
+ demo.title = f"Coding Quiz: {EXAM_DATASET_ID}"
223
+ # State variables
224
+ question_idx = gr.State(value=0)
225
+ user_answers = gr.State(value=[])
226
+
227
+ with gr.Row(variant="compact"):
228
+ gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz")
229
+ with gr.Row(variant="compact"):
230
+ gr.Markdown(
231
+ "Log in first, then click 'Start' to begin. Complete each coding challenge, click 'Next', "
232
+ "and finally click 'Submit' to publish your results to the Hugging Face Hub."
233
+ )
234
+
235
+ with gr.Row(variant="panel"):
236
+ question_text = gr.Markdown("")
237
+ code_input = gr.Code(language="python", label="Your Solution", visible=False)
238
+
239
+ with gr.Row(variant="compact"):
240
+ status_text = gr.Markdown("")
241
+
242
+ with gr.Row(variant="compact"):
243
+ final_markdown = gr.Markdown("", visible=False)
244
+
245
+ next_btn = gr.Button("Next ⏭️")
246
+ submit_btn = gr.Button("Submit ✅")
247
+
248
+ with gr.Row(variant="compact"):
249
+ login_btn = gr.LoginButton()
250
+ start_btn = gr.Button("Start", visible=False)
251
+
252
+ login_btn.click(fn=on_user_logged_in, inputs=None, outputs=[login_btn, start_btn])
253
+
254
+ start_btn.click(
255
+ fn=handle_quiz,
256
+ inputs=[question_idx, user_answers, code_input, gr.State(True)],
257
+ outputs=[
258
+ question_text,
259
+ code_input,
260
+ status_text,
261
+ question_idx,
262
+ user_answers,
263
+ start_btn,
264
+ final_markdown,
265
+ ],
266
+ )
267
+
268
+ next_btn.click(
269
+ fn=handle_quiz,
270
+ inputs=[question_idx, user_answers, code_input, gr.State(False)],
271
+ outputs=[
272
+ question_text,
273
+ code_input,
274
+ status_text,
275
+ question_idx,
276
+ user_answers,
277
+ start_btn,
278
+ final_markdown,
279
+ ],
280
+ )
281
+
282
+ submit_btn.click(
283
+ fn=push_results_to_hub,
284
+ inputs=[user_answers, login_btn],
285
+ outputs=status_text,
286
+ )
287
+
288
+
289
+ if __name__ == "__main__":
290
+ demo.launch()
example.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "challenge": "Complete the function to calculate the factorial of a number using recursion",
4
+ "solution": "if n <= 1:\n return 1\nreturn n * factorial(n-1)",
5
+ "placeholder": "def factorial(n):\n # TODO: Implement recursive factorial calculation\n pass",
6
+ "context": "Factorial is the product of all positive integers less than or equal to n. For example, factorial(5) = 5 * 4 * 3 * 2 * 1 = 120. Base case is n=1 or n=0 which returns 1."
7
+ },
8
+ {
9
+ "challenge": "Implement a function to reverse a string without using built-in reverse methods",
10
+ "solution": "reversed_str = ''\nfor i in range(len(s)-1, -1, -1):\n reversed_str += s[i]\nreturn reversed_str",
11
+ "placeholder": "def reverse_string(s):\n # TODO: Implement string reversal\n pass",
12
+ "context": "String reversal can be done by iterating from the end to the beginning or using slicing with a negative step. This tests understanding of string manipulation and iteration."
13
+ },
14
+ {
15
+ "challenge": "Write a list comprehension that filters out all even numbers from the input list",
16
+ "solution": "return [num for num in numbers if num % 2 != 0]",
17
+ "placeholder": "def get_odd_numbers(numbers):\n # TODO: Filter odd numbers using list comprehension\n pass",
18
+ "context": "List comprehensions provide a concise way to create lists based on existing lists. The modulo operator % is used to test for odd/even numbers."
19
+ },
20
+ {
21
+ "challenge": "Complete the function to find the first non-repeating character in a string",
22
+ "solution": "char_count = {}\nfor char in s:\n char_count[char] = char_count.get(char, 0) + 1\nfor char in s:\n if char_count[char] == 1:\n return char\nreturn None",
23
+ "placeholder": "def first_non_repeating(s):\n # TODO: Find first non-repeating character\n pass",
24
+ "context": "This problem tests dictionary usage and string iteration. The solution involves counting character frequencies and then finding the first character with count 1."
25
+ },
26
+ {
27
+ "challenge": "Implement a function that checks if a string is a valid palindrome",
28
+ "solution": "s = ''.join(char.lower() for char in s if char.isalnum())\nreturn s == s[::-1]",
29
+ "placeholder": "def is_palindrome(s):\n # TODO: Check if string is palindrome (ignoring spaces and punctuation)\n pass",
30
+ "context": "Palindrome check requires string cleaning (removing spaces/punctuation), case normalization, and comparison. String slicing with [::-1] provides an efficient way to reverse strings in Python."
31
+ }
32
+ ]
pyproject.toml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "code-assignment-app"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "datasets>=3.2.0",
9
+ "gradio[oauth]>=5.13.2",
10
+ "huggingface-hub>=0.28.0",
11
+ "ipykernel>=6.29.5",
12
+ ]
uv.lock ADDED
The diff for this file is too large to render. See raw diff