Spaces:
Running
Running
burtenshaw
commited on
Commit
·
6f46aeb
0
Parent(s):
first commit
Browse files- .python-version +1 -0
- README.md +17 -0
- app.py +290 -0
- example.json +32 -0
- pyproject.toml +12 -0
- uv.lock +0 -0
.python-version
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3.11
|
README.md
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Dataset Quiz
|
3 |
+
emoji: 🔥
|
4 |
+
colorFrom: pink
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.13.1
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: apache-2.0
|
11 |
+
short_description: A quiz app for rows of a dataset
|
12 |
+
hf_oauth: true
|
13 |
+
---
|
14 |
+
|
15 |
+
# Dataset Quiz
|
16 |
+
|
17 |
+
A quiz app for rows of a dataset.
|
app.py
ADDED
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from datetime import datetime
|
3 |
+
import random
|
4 |
+
|
5 |
+
import gradio as gr
|
6 |
+
from datasets import load_dataset, Dataset, DatasetDict
|
7 |
+
from huggingface_hub import whoami, InferenceClient
|
8 |
+
|
9 |
+
# Initialize the inference client
|
10 |
+
client = InferenceClient(
|
11 |
+
api_key=os.getenv("HF_API_KEY"), # Make sure to set this environment variable
|
12 |
+
)
|
13 |
+
|
14 |
+
# Load questions from Hugging Face dataset
|
15 |
+
EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 5 # We have 5 questions total
|
16 |
+
EXAM_PASSING_SCORE = os.getenv("EXAM_PASSING_SCORE") or 0.0
|
17 |
+
EXAM_DATASET_ID = "agents-course/dummy-code-quiz"
|
18 |
+
|
19 |
+
# prep the dataset for the quiz
|
20 |
+
ds = load_dataset(EXAM_DATASET_ID, split="train")
|
21 |
+
quiz_data = ds.to_list()
|
22 |
+
random.shuffle(quiz_data)
|
23 |
+
|
24 |
+
def check_code(user_code, solution, challenge):
|
25 |
+
"""
|
26 |
+
Use LLM to evaluate if the user's code solution is correct.
|
27 |
+
Returns True if the solution is correct, False otherwise.
|
28 |
+
"""
|
29 |
+
prompt = f"""You are an expert Python programming instructor evaluating a student's code solution.
|
30 |
+
|
31 |
+
Challenge:
|
32 |
+
{challenge}
|
33 |
+
|
34 |
+
Reference Solution:
|
35 |
+
{solution}
|
36 |
+
|
37 |
+
Student's Solution:
|
38 |
+
{user_code}
|
39 |
+
|
40 |
+
Evaluate if the student's solution is functionally equivalent to the reference solution.
|
41 |
+
Consider:
|
42 |
+
1. Does it solve the problem correctly?
|
43 |
+
2. Does it handle edge cases appropriately?
|
44 |
+
3. Does it follow the requirements of the challenge?
|
45 |
+
|
46 |
+
Respond with ONLY "CORRECT" or "INCORRECT" followed by a brief explanation.
|
47 |
+
"""
|
48 |
+
|
49 |
+
messages = [{"role": "user", "content": prompt}]
|
50 |
+
|
51 |
+
try:
|
52 |
+
completion = client.chat.completions.create(
|
53 |
+
model="Qwen/Qwen2.5-Coder-32B-Instruct",
|
54 |
+
messages=messages,
|
55 |
+
max_tokens=500,
|
56 |
+
)
|
57 |
+
|
58 |
+
response = completion.choices[0].message.content.strip()
|
59 |
+
|
60 |
+
# Extract the verdict from the response
|
61 |
+
is_correct = response.upper().startswith("CORRECT")
|
62 |
+
|
63 |
+
# Add the explanation to the status text
|
64 |
+
explanation = response.split("\n", 1)[1] if "\n" in response else ""
|
65 |
+
gr.Info(explanation)
|
66 |
+
|
67 |
+
return is_correct
|
68 |
+
|
69 |
+
except Exception as e:
|
70 |
+
gr.Warning(f"Error checking code: {str(e)}")
|
71 |
+
# Fall back to simple string comparison if LLM fails
|
72 |
+
return user_code.strip() == solution.strip()
|
73 |
+
|
74 |
+
|
75 |
+
def on_user_logged_in(token: gr.OAuthToken | None):
|
76 |
+
"""Handle user login state"""
|
77 |
+
if token is not None:
|
78 |
+
return gr.update(visible=False), gr.update(visible=True)
|
79 |
+
else:
|
80 |
+
return gr.update(visible=True), gr.update(visible=False)
|
81 |
+
|
82 |
+
|
83 |
+
def push_results_to_hub(
|
84 |
+
user_answers: list, token: gr.OAuthToken | None, signed_in_message: str
|
85 |
+
):
|
86 |
+
"""Push results to Hugging Face Hub."""
|
87 |
+
|
88 |
+
print(f"signed_in_message: {signed_in_message}")
|
89 |
+
|
90 |
+
if not user_answers: # Check if there are any answers to submit
|
91 |
+
gr.Warning("No answers to submit!")
|
92 |
+
return "No answers to submit!"
|
93 |
+
|
94 |
+
if token is None:
|
95 |
+
gr.Warning("Please log in to Hugging Face before pushing!")
|
96 |
+
return "Please log in to Hugging Face before pushing!"
|
97 |
+
|
98 |
+
# Calculate grade
|
99 |
+
correct_count = sum(1 for answer in user_answers if answer["is_correct"])
|
100 |
+
total_questions = len(user_answers)
|
101 |
+
grade = correct_count / total_questions if total_questions > 0 else 0
|
102 |
+
|
103 |
+
if grade < float(EXAM_PASSING_SCORE):
|
104 |
+
gr.Warning(
|
105 |
+
f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}"
|
106 |
+
)
|
107 |
+
return f"You scored {grade:.1%}. Please try again to achieve at least {float(EXAM_PASSING_SCORE):.1%}"
|
108 |
+
|
109 |
+
gr.Info("Submitting answers to the Hub. Please wait...", duration=2)
|
110 |
+
|
111 |
+
user_info = whoami(token=token.token)
|
112 |
+
username = user_info["name"]
|
113 |
+
repo_id = f"{EXAM_DATASET_ID}_responses"
|
114 |
+
submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
115 |
+
|
116 |
+
# Create a dataset with the user's answers and metadata
|
117 |
+
submission_data = [
|
118 |
+
{
|
119 |
+
"username": username,
|
120 |
+
"datetime": submission_time,
|
121 |
+
"grade": grade,
|
122 |
+
**answer, # Include all answer data
|
123 |
+
}
|
124 |
+
for answer in user_answers
|
125 |
+
]
|
126 |
+
|
127 |
+
try:
|
128 |
+
# Try to load existing dataset
|
129 |
+
existing_ds = load_dataset(repo_id)
|
130 |
+
# Convert to DatasetDict if it isn't already
|
131 |
+
if not isinstance(existing_ds, dict):
|
132 |
+
existing_ds = DatasetDict({"default": existing_ds})
|
133 |
+
except Exception:
|
134 |
+
# If dataset doesn't exist, create empty DatasetDict
|
135 |
+
existing_ds = DatasetDict()
|
136 |
+
|
137 |
+
# Create new dataset from submission
|
138 |
+
new_ds = Dataset.from_list(submission_data)
|
139 |
+
|
140 |
+
# Add or update the split for this user
|
141 |
+
existing_ds[username] = new_ds
|
142 |
+
|
143 |
+
# Push the updated dataset to the Hub
|
144 |
+
existing_ds.push_to_hub(
|
145 |
+
repo_id,
|
146 |
+
private=True, # Make it private by default since it contains student submissions
|
147 |
+
)
|
148 |
+
|
149 |
+
return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}"
|
150 |
+
|
151 |
+
|
152 |
+
def handle_quiz(question_idx, user_answers, submitted_code, is_start):
|
153 |
+
"""Handle quiz state and progression"""
|
154 |
+
# Hide the start button once the first question is shown
|
155 |
+
start_btn_update = gr.update(visible=False) if is_start else None
|
156 |
+
|
157 |
+
# If this is the first time (start=True), begin at question_idx=0
|
158 |
+
if is_start:
|
159 |
+
question_idx = 0
|
160 |
+
else:
|
161 |
+
# If not the first question and there's a submission, store the user's last submission
|
162 |
+
if (
|
163 |
+
question_idx < len(quiz_data) and submitted_code.strip()
|
164 |
+
): # Only check if there's code
|
165 |
+
current_q = quiz_data[question_idx]
|
166 |
+
is_correct = check_code(
|
167 |
+
submitted_code, current_q["solution"], current_q["challenge"]
|
168 |
+
)
|
169 |
+
user_answers.append(
|
170 |
+
{
|
171 |
+
"challenge": current_q["challenge"],
|
172 |
+
"submitted_code": submitted_code,
|
173 |
+
"correct_solution": current_q["solution"],
|
174 |
+
"is_correct": is_correct,
|
175 |
+
}
|
176 |
+
)
|
177 |
+
question_idx += 1
|
178 |
+
|
179 |
+
# If we've reached the end, show final results
|
180 |
+
if question_idx >= len(quiz_data):
|
181 |
+
correct_count = sum(1 for answer in user_answers if answer["is_correct"])
|
182 |
+
grade = correct_count / len(user_answers)
|
183 |
+
results_text = (
|
184 |
+
f"**Quiz Complete!**\n\n"
|
185 |
+
f"Your score: {grade:.1%}\n"
|
186 |
+
f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n"
|
187 |
+
f"Your answers:\n\n"
|
188 |
+
)
|
189 |
+
for idx, answer in enumerate(user_answers):
|
190 |
+
results_text += (
|
191 |
+
f"Question {idx + 1}: {'✅' if answer['is_correct'] else '❌'}\n"
|
192 |
+
)
|
193 |
+
results_text += (
|
194 |
+
f"Your code:\n```python\n{answer['submitted_code']}\n```\n\n"
|
195 |
+
)
|
196 |
+
|
197 |
+
return (
|
198 |
+
"", # question_text becomes blank
|
199 |
+
gr.update(value="", visible=False), # clear and hide code input
|
200 |
+
f"{'✅ Passed!' if grade >= float(EXAM_PASSING_SCORE) else '❌ Did not pass'}",
|
201 |
+
question_idx,
|
202 |
+
user_answers,
|
203 |
+
start_btn_update,
|
204 |
+
gr.update(value=results_text, visible=True), # show final_markdown
|
205 |
+
)
|
206 |
+
else:
|
207 |
+
# Show the next question
|
208 |
+
q = quiz_data[question_idx]
|
209 |
+
challenge_text = f"## Question {question_idx + 1} \n### {q['challenge']}"
|
210 |
+
return (
|
211 |
+
challenge_text,
|
212 |
+
gr.update(value=q["placeholder"], visible=True),
|
213 |
+
"Submit your code solution and click 'Next' to continue.",
|
214 |
+
question_idx,
|
215 |
+
user_answers,
|
216 |
+
start_btn_update,
|
217 |
+
gr.update(visible=False), # Hide final_markdown
|
218 |
+
)
|
219 |
+
|
220 |
+
|
221 |
+
with gr.Blocks() as demo:
|
222 |
+
demo.title = f"Coding Quiz: {EXAM_DATASET_ID}"
|
223 |
+
# State variables
|
224 |
+
question_idx = gr.State(value=0)
|
225 |
+
user_answers = gr.State(value=[])
|
226 |
+
|
227 |
+
with gr.Row(variant="compact"):
|
228 |
+
gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz")
|
229 |
+
with gr.Row(variant="compact"):
|
230 |
+
gr.Markdown(
|
231 |
+
"Log in first, then click 'Start' to begin. Complete each coding challenge, click 'Next', "
|
232 |
+
"and finally click 'Submit' to publish your results to the Hugging Face Hub."
|
233 |
+
)
|
234 |
+
|
235 |
+
with gr.Row(variant="panel"):
|
236 |
+
question_text = gr.Markdown("")
|
237 |
+
code_input = gr.Code(language="python", label="Your Solution", visible=False)
|
238 |
+
|
239 |
+
with gr.Row(variant="compact"):
|
240 |
+
status_text = gr.Markdown("")
|
241 |
+
|
242 |
+
with gr.Row(variant="compact"):
|
243 |
+
final_markdown = gr.Markdown("", visible=False)
|
244 |
+
|
245 |
+
next_btn = gr.Button("Next ⏭️")
|
246 |
+
submit_btn = gr.Button("Submit ✅")
|
247 |
+
|
248 |
+
with gr.Row(variant="compact"):
|
249 |
+
login_btn = gr.LoginButton()
|
250 |
+
start_btn = gr.Button("Start", visible=False)
|
251 |
+
|
252 |
+
login_btn.click(fn=on_user_logged_in, inputs=None, outputs=[login_btn, start_btn])
|
253 |
+
|
254 |
+
start_btn.click(
|
255 |
+
fn=handle_quiz,
|
256 |
+
inputs=[question_idx, user_answers, code_input, gr.State(True)],
|
257 |
+
outputs=[
|
258 |
+
question_text,
|
259 |
+
code_input,
|
260 |
+
status_text,
|
261 |
+
question_idx,
|
262 |
+
user_answers,
|
263 |
+
start_btn,
|
264 |
+
final_markdown,
|
265 |
+
],
|
266 |
+
)
|
267 |
+
|
268 |
+
next_btn.click(
|
269 |
+
fn=handle_quiz,
|
270 |
+
inputs=[question_idx, user_answers, code_input, gr.State(False)],
|
271 |
+
outputs=[
|
272 |
+
question_text,
|
273 |
+
code_input,
|
274 |
+
status_text,
|
275 |
+
question_idx,
|
276 |
+
user_answers,
|
277 |
+
start_btn,
|
278 |
+
final_markdown,
|
279 |
+
],
|
280 |
+
)
|
281 |
+
|
282 |
+
submit_btn.click(
|
283 |
+
fn=push_results_to_hub,
|
284 |
+
inputs=[user_answers, login_btn],
|
285 |
+
outputs=status_text,
|
286 |
+
)
|
287 |
+
|
288 |
+
|
289 |
+
if __name__ == "__main__":
|
290 |
+
demo.launch()
|
example.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"challenge": "Complete the function to calculate the factorial of a number using recursion",
|
4 |
+
"solution": "if n <= 1:\n return 1\nreturn n * factorial(n-1)",
|
5 |
+
"placeholder": "def factorial(n):\n # TODO: Implement recursive factorial calculation\n pass",
|
6 |
+
"context": "Factorial is the product of all positive integers less than or equal to n. For example, factorial(5) = 5 * 4 * 3 * 2 * 1 = 120. Base case is n=1 or n=0 which returns 1."
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"challenge": "Implement a function to reverse a string without using built-in reverse methods",
|
10 |
+
"solution": "reversed_str = ''\nfor i in range(len(s)-1, -1, -1):\n reversed_str += s[i]\nreturn reversed_str",
|
11 |
+
"placeholder": "def reverse_string(s):\n # TODO: Implement string reversal\n pass",
|
12 |
+
"context": "String reversal can be done by iterating from the end to the beginning or using slicing with a negative step. This tests understanding of string manipulation and iteration."
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"challenge": "Write a list comprehension that filters out all even numbers from the input list",
|
16 |
+
"solution": "return [num for num in numbers if num % 2 != 0]",
|
17 |
+
"placeholder": "def get_odd_numbers(numbers):\n # TODO: Filter odd numbers using list comprehension\n pass",
|
18 |
+
"context": "List comprehensions provide a concise way to create lists based on existing lists. The modulo operator % is used to test for odd/even numbers."
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"challenge": "Complete the function to find the first non-repeating character in a string",
|
22 |
+
"solution": "char_count = {}\nfor char in s:\n char_count[char] = char_count.get(char, 0) + 1\nfor char in s:\n if char_count[char] == 1:\n return char\nreturn None",
|
23 |
+
"placeholder": "def first_non_repeating(s):\n # TODO: Find first non-repeating character\n pass",
|
24 |
+
"context": "This problem tests dictionary usage and string iteration. The solution involves counting character frequencies and then finding the first character with count 1."
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"challenge": "Implement a function that checks if a string is a valid palindrome",
|
28 |
+
"solution": "s = ''.join(char.lower() for char in s if char.isalnum())\nreturn s == s[::-1]",
|
29 |
+
"placeholder": "def is_palindrome(s):\n # TODO: Check if string is palindrome (ignoring spaces and punctuation)\n pass",
|
30 |
+
"context": "Palindrome check requires string cleaning (removing spaces/punctuation), case normalization, and comparison. String slicing with [::-1] provides an efficient way to reverse strings in Python."
|
31 |
+
}
|
32 |
+
]
|
pyproject.toml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "code-assignment-app"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Add your description here"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.11"
|
7 |
+
dependencies = [
|
8 |
+
"datasets>=3.2.0",
|
9 |
+
"gradio[oauth]>=5.13.2",
|
10 |
+
"huggingface-hub>=0.28.0",
|
11 |
+
"ipykernel>=6.29.5",
|
12 |
+
]
|
uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|