import gradio as gr import json from datetime import datetime import random import os from huggingface_hub import Repository random.seed(20240128) import subprocess # Set Git user information subprocess.run(["git", "config", "--global", "user.email", "david.yd.hao@outlook.com"]) subprocess.run(["git", "config", "--global", "user.name", "yiduohao"]) hf_token = os.getenv("HF_TOKEN") print("HF Token is none?", hf_token is None) # Initialize the repository DATASET_REPO_URL = "https://huggingface.co/datasets/Scherzando/RIR-Resound-User-Study-Response" repo = Repository( local_dir="user_responses", clone_from=DATASET_REPO_URL, use_auth_token=hf_token ) def prepare_test_cases(): # json_path = "videos/videos.json" json_path = "rir/rir.json" with open(json_path, "r") as f: video_dict = json.load(f) video_ids = list(video_dict.keys()) for video_id in video_ids: if random.random() > 0.5: video_list = [video_dict[video_id]['ours'], video_dict[video_id]['baseline']] else: video_list = [video_dict[video_id]['baseline'], video_dict[video_id]['ours']] random.shuffle(video_list) video_dict[video_id]['Video 1'] = video_list[0] video_dict[video_id]['Video 2'] = video_list[1] video_dict[video_id]['Ground Truth'] = video_dict[video_id]['groundtruth'] return video_dict video_dict = prepare_test_cases() video_ids = list(video_dict.keys()) random.shuffle(video_ids) questions = [ "Between Video 1 and Video 2, which one's audio conveyed changes in audio volume more accurately compared to the Reference?", "Between Video 1 and Video 2, which one's audio made it easier for you to identify the direction of the sound source more accurately?", "Between Video 1 and Video 2, which one's audio do you feel aligns better with the Reference overall?" ] submissions_file = "user_responses/response.jsonl" def has_already_submitted(user_id): if os.path.exists(submissions_file): with open(submissions_file, "r") as f: for line in f: submission = json.loads(line) if submission.get("u_id") == user_id: return True return False # Save responses def save_responses(unique_submission, *responses): timestamp = datetime.now().isoformat() info = responses[-1] responses = responses[:-1] unique_id = info["session_id"] user_id = f"{unique_id}" # Check for unique submission if unique_submission and has_already_submitted(user_id): return "You have already submitted responses. Thank you for participating!" # Initialize the result dictionary result = { "u_id": user_id, "timestamp": timestamp, "responses": [] } for index in range(len(video_ids)): start_idx = index * len(questions) end_idx = start_idx + len(questions) response = responses[start_idx:end_idx] if any(r is None for r in response): return "Please answer all questions before submitting." video_id = video_ids[index] pair_response = { video_id: { 'distance': video_dict[video_id][response[0]], 'direction': video_dict[video_id][response[1]], 'overall': video_dict[video_id][response[2]], } } result["responses"].append(pair_response) result["responses"] = sorted(result["responses"], key=lambda x: x.keys()) # Save response locally and push to Hugging Face Hub with open(submissions_file, "a") as f: f.write(json.dumps(result) + "\n") # Push changes to the Hugging Face dataset repo repo.push_to_hub() return "All responses saved! Thank you for participating!" def create_interface(unique_submission=False): with gr.Blocks() as demo: gr.Markdown("# Human Preference Study: Room Spatial Audio Rendering") gr.Markdown(""" Before starting the study, please make sure you are in a quiet environment and wearing headphones, and read the following guidance carefully. - In this study, you will be presented with pairs of videos **with spatial audio**. - Each pair consists of a reference spatial audio (marked by **Reference**) and two generated spatial audio (marked by **Video 1** and **Video 2**). - Please watch and **listen** to each row of videos carefully and answer the three associated questions. - For each video, the left-hand side is the camera (head) view, and the right-hand side is the corresponding birds-eye view of the room with **speaker (blue)** and **head poses (red)**. **Binanural Headphones are required!** """) # Display video pairs and questions responses = [] for index, video_id in enumerate(video_ids): video_gt = video_dict[video_id]['groundtruth'] video1 = video_dict[video_id]['Video 1'] video2 = video_dict[video_id]['Video 2'] gr.Markdown(f"### Video Pair {index + 1}") with gr.Row(): gr.Video(video_gt, label="Reference") gr.Video(video1, label="Video 1") gr.Video(video2, label="Video 2") # with gr.Column(): with gr.Row(): responses.append(gr.Radio(["Video 1", "Video 2"], label=questions[0], value=None)) with gr.Row(): responses.append(gr.Radio(["Video 1", "Video 2"], label=questions[1], value=None)) with gr.Row(): responses.append(gr.Radio(["Video 1", "Video 2"], label=questions[2], value=None)) gr.Markdown("---") info = gr.JSON(visible=False) demo.load(predict, None, info) submit_btn = gr.Button("Submit") result_message = gr.Textbox(label="Message (please only submit once)", interactive=False) submit_btn.click( fn=lambda *args: save_responses(unique_submission, *args), inputs=responses+[info], outputs=result_message ) return demo def predict(request: gr.Request): headers = request.headers host = request.client.host user_agent = request.headers["user-agent"] session_id = request.session_hash return { "ip": host, "user_agent": user_agent, "headers": headers, "session_id": session_id } if __name__ == "__main__": # Launch with unique_submission set based on `--unique` flag demo = create_interface(unique_submission=True) demo.launch(share=True)