Scherzando commited on
Commit
1deee38
·
1 Parent(s): fde429b

Initial Commit

Browse files
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ from datetime import datetime
4
+ import random
5
+ import os
6
+ from huggingface_hub import Repository
7
+ random.seed(1234)
8
+
9
+ import subprocess
10
+ # Set Git user information
11
+ subprocess.run(["git", "config", "--global", "user.email", "[email protected]"])
12
+ subprocess.run(["git", "config", "--global", "user.name", "yiduohao"])
13
+
14
+
15
+ hf_token = os.getenv("HF_TOKEN")
16
+ print("HF Token is none?", hf_token is None)
17
+
18
+ # Initialize the repository
19
+ DATASET_REPO_URL = "https://huggingface.co/datasets/Scherzando/RIR-Resound-User-Study-Response"
20
+ repo = Repository(
21
+ local_dir="user_responses",
22
+ clone_from=DATASET_REPO_URL,
23
+ use_auth_token=hf_token
24
+ )
25
+
26
+ def prepare_test_cases():
27
+ # json_path = "videos/videos.json"
28
+ json_path = "rir/rir.json"
29
+ with open(json_path, "r") as f:
30
+ video_dict = json.load(f)
31
+
32
+ video_ids = list(video_dict.keys())
33
+ for video_id in video_ids:
34
+ if random.random() > 0.5:
35
+ video_list = [video_dict[video_id]['ours'], video_dict[video_id]['baseline']]
36
+ else:
37
+ video_list = [video_dict[video_id]['baseline'], video_dict[video_id]['ours']]
38
+
39
+ random.shuffle(video_list)
40
+
41
+ video_dict[video_id]['Video 1'] = video_list[0]
42
+ video_dict[video_id]['Video 2'] = video_list[1]
43
+ video_dict[video_id]['Ground Truth'] = video_dict[video_id]['groundtruth']
44
+
45
+ return video_dict
46
+
47
+ video_dict = prepare_test_cases()
48
+ video_ids = list(video_dict.keys())
49
+ random.shuffle(video_ids)
50
+
51
+
52
+ questions = [
53
+ "Between Video 1 and Video 2, which one's audio conveyed changes in audio volume more accurately compared to the Reference?",
54
+ "Between Video 1 and Video 2, which one's audio made it easier for you to identify the direction of the sound source more accurately?",
55
+ "Between Video 1 and Video 2, which one's audio do you feel aligns better with the Reference overall?"
56
+ ]
57
+ submissions_file = "user_responses/response.jsonl"
58
+
59
+ def has_already_submitted(user_id):
60
+ if os.path.exists(submissions_file):
61
+ with open(submissions_file, "r") as f:
62
+ for line in f:
63
+ submission = json.loads(line)
64
+ if submission.get("u_id") == user_id:
65
+ return True
66
+ return False
67
+
68
+ # Save responses
69
+ def save_responses(unique_submission, *responses):
70
+ timestamp = datetime.now().isoformat()
71
+ info = responses[-1]
72
+ responses = responses[:-1]
73
+ unique_id = info["session_id"]
74
+ user_id = f"{unique_id}"
75
+
76
+ # Check for unique submission
77
+ if unique_submission and has_already_submitted(user_id):
78
+ return "You have already submitted responses. Thank you for participating!"
79
+
80
+ # Initialize the result dictionary
81
+ result = {
82
+ "u_id": user_id,
83
+ "timestamp": timestamp,
84
+ "responses": []
85
+ }
86
+
87
+ for index in range(len(video_ids)):
88
+ start_idx = index * len(questions)
89
+ end_idx = start_idx + len(questions)
90
+
91
+ response = responses[start_idx:end_idx]
92
+ if any(r is None for r in response):
93
+ return "Please answer all questions before submitting."
94
+
95
+ video_id = video_ids[index]
96
+ pair_response = {
97
+ video_id: {
98
+ 'distance': video_dict[video_id][response[0]],
99
+ 'direction': video_dict[video_id][response[1]],
100
+ 'overall': video_dict[video_id][response[2]],
101
+ }
102
+ }
103
+ result["responses"].append(pair_response)
104
+
105
+ result["responses"] = sorted(result["responses"], key=lambda x: x.keys())
106
+
107
+ # Save response locally and push to Hugging Face Hub
108
+ with open(submissions_file, "a") as f:
109
+ f.write(json.dumps(result) + "\n")
110
+
111
+ # Push changes to the Hugging Face dataset repo
112
+ repo.push_to_hub()
113
+
114
+ return "All responses saved! Thank you for participating!"
115
+
116
+
117
+
118
+
119
+ def create_interface(unique_submission=False):
120
+ with gr.Blocks() as demo:
121
+ gr.Markdown("# Human Preference Study: Room Spatial Audio Rendering")
122
+ gr.Markdown("""
123
+ Before starting the study, please make sure you are in a quiet environment and wearing headphones, and read the following guidance carefully.
124
+ - In this study, you will be presented with pairs of videos **with spatial audio**.
125
+ - Each pair consists of a reference spatial audio (marked by **Reference**) and two generated spatial audio (marked by **Video 1** and **Video 2**).
126
+ - Please watch and **listen** to each row of videos carefully and answer the three associated questions.
127
+ - For each video, the left-hand side is the camera (head) view, and the right-hand side is the corresponding birds-eye view of the room with speaker and head poses.
128
+
129
+ **Binanural Headphones are required!**
130
+ """)
131
+
132
+ # Display video pairs and questions
133
+ responses = []
134
+ for index, video_id in enumerate(video_ids):
135
+ video_gt = video_dict[video_id]['groundtruth']
136
+ video1 = video_dict[video_id]['Video 1']
137
+ video2 = video_dict[video_id]['Video 2']
138
+
139
+ gr.Markdown(f"### Video Pair {index + 1}")
140
+ with gr.Row():
141
+ gr.Video(video_gt, label="Reference")
142
+ gr.Video(video1, label="Video 1")
143
+ gr.Video(video2, label="Video 2")
144
+ # with gr.Column():
145
+ with gr.Row():
146
+ responses.append(gr.Radio(["Video 1", "Video 2"], label=questions[0], value=None))
147
+ with gr.Row():
148
+ responses.append(gr.Radio(["Video 1", "Video 2"], label=questions[1], value=None))
149
+ with gr.Row():
150
+ responses.append(gr.Radio(["Video 1", "Video 2"], label=questions[2], value=None))
151
+
152
+ gr.Markdown("---")
153
+
154
+ info = gr.JSON(visible=False)
155
+ demo.load(predict, None, info)
156
+
157
+ submit_btn = gr.Button("Submit")
158
+ result_message = gr.Textbox(label="Message (please only submit once)", interactive=False)
159
+
160
+ submit_btn.click(
161
+ fn=lambda *args: save_responses(unique_submission, *args),
162
+ inputs=responses+[info],
163
+ outputs=result_message
164
+ )
165
+
166
+ return demo
167
+
168
+ def predict(request: gr.Request):
169
+ headers = request.headers
170
+ host = request.client.host
171
+ user_agent = request.headers["user-agent"]
172
+ session_id = request.session_hash
173
+ return {
174
+ "ip": host,
175
+ "user_agent": user_agent,
176
+ "headers": headers,
177
+ "session_id": session_id
178
+ }
179
+
180
+ if __name__ == "__main__":
181
+ # Launch with unique_submission set based on `--unique` flag
182
+ demo = create_interface(unique_submission=True)
183
+ demo.launch(share=True)
rir/baseline/combined_bene_tx_rotation_ele.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f501deb9a53ddaeb90d07275d54906ee0998646676783c18fdf6a79862462b9e
3
+ size 5869131
rir/baseline/combined_bene_vanilla.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4504e141c25fc6f036dccd9ef200db2948f5bab5040f64c327059f56234491d
3
+ size 8269383
rir/baseline/combined_pomaria_tx_moving_vanilla.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adf3c069d9801c0efbb97ee86d0eb4e7d373b0b9f3da02a453c874a17b9abe41
3
+ size 9600134
rir/baseline/combined_pomaria_tx_rotation_ele.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48173e1a932307dda06cf3050c4233b7ca468c24530e8d5bb8b0bbeb90567ba7
3
+ size 8442679
rir/baseline/combined_pomaria_vanilla.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5d249eef893134935af384de1e877e01fbca724683e736a84e3f4db7471d08d
3
+ size 9534331
rir/baseline/combined_rs_tx_rotation1_ele.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0027e90b39121a8cd26e912383a43c935afea0b45c8fd6d7fdc70bb8229059b
3
+ size 7458436
rir/baseline/combined_rs_tx_rotation2_ele.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:696a1c4657855d127fbd0bddba4f2706579ec2e05fcf9ed8d12763d43503358a
3
+ size 6808873
rir/baseline/combined_rs_vanilla.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1834bc234fef9585b714bf86a0d072bc8a8dafd77b0ee16a24046f6a084ec21a
3
+ size 9761008
rir/groundtruth/combined_bene_gt.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa037d01e53bd5d3d840a130a95398c2b3018ad1558d9d380040f80317237107
3
+ size 8269383
rir/groundtruth/combined_bene_tx_rotation_gt.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b68ca74647aac794e5eafb0a9ce7bdebb618939e05c32a8a3dc630acb29f948
3
+ size 5869131
rir/groundtruth/combined_pomaria_gt.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb3229d91ff91f06670fe1c6fde92a62a90c8961a2690b877156da29d4818fcd
3
+ size 9534331
rir/groundtruth/combined_pomaria_tx_moving_gt.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc7212ba964aa73710a32af715d187b3e657b2a1af8efc534ab684b897a35d97
3
+ size 9600134
rir/groundtruth/combined_pomaria_tx_rotation_gt.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a47a73036d818a7cb10e7e4e59abbdc1dd913bd673e2d571c2e4585c243c65d9
3
+ size 8442679
rir/groundtruth/combined_rs_gt.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b82a5d68d2973dc1195d44616182b1f047aed72e288e5f968805fa35897354ce
3
+ size 9761008
rir/groundtruth/combined_rs_tx_rotation1_gt.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8ba7802becbdff0b2a4ce57bcbdcdf6a2597a4ed24d121ee7484e4613dbcb2f
3
+ size 7458436
rir/groundtruth/combined_rs_tx_rotation2_gt.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:089422fa95fda67f7a5eef34bc1895f20b09f9934f6185894cbcf55a214235c3
3
+ size 6808873
rir/ours/combined_bene_ele.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f6cf778948ce2fcb52153006b20ab288ae134ad790a507ba835167ed3ba6575
3
+ size 8269383
rir/ours/combined_bene_tx_rotation_ssl.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c941f16ff17fe7bbd3bea73620fac2b0e435bdee5dd27670ba1838fd5843b825
3
+ size 5869131
rir/ours/combined_pomaria_ele.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:397d10e0125fba7d3cbf512b391a6482697d69edfb8700e089746f8ef41db506
3
+ size 9534331
rir/ours/combined_pomaria_tx_moving_ele.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7906b8219868ace909e7611853525db4ed58f32d0723684bb76b2745628be9c4
3
+ size 9600134
rir/ours/combined_pomaria_tx_rotation_ssl.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aebf1533cbda8ee3aa5287324fd7ac4ab145da348e6321b8581ee1526a16bb38
3
+ size 8442679
rir/ours/combined_rs_ele.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8050eafe06330285e049e8aee2b079658c7f42f2064603a16405d3fd4017fdc
3
+ size 9761008
rir/ours/combined_rs_tx_rotation1_ssl.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a24f8cb18cb242a63982133438215f85dc68fb45a1744c5db0df5dc3c78b672a
3
+ size 7458436
rir/ours/combined_rs_tx_rotation2_ssl.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd53a6d21c1fdce48732b8120b575deaaf5e30679b8928c139c3a3608f75d1b
3
+ size 6808873
rir/rir.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bene": {
3
+ "ours": "rir/ours/combined_bene_ele.mp4",
4
+ "baseline": "rir/baseline/combined_bene_vanilla.mp4",
5
+ "groundtruth": "rir/groundtruth/combined_bene_gt.mp4"
6
+ },
7
+ "bene_tx_rotation": {
8
+ "ours": "rir/ours/combined_bene_tx_rotation_ssl.mp4",
9
+ "baseline": "rir/baseline/combined_bene_tx_rotation_ele.mp4",
10
+ "groundtruth": "rir/groundtruth/combined_bene_tx_rotation_gt.mp4"
11
+ },
12
+ "pomaria": {
13
+ "ours": "rir/ours/combined_pomaria_ele.mp4",
14
+ "baseline": "rir/baseline/combined_pomaria_vanilla.mp4",
15
+ "groundtruth": "rir/groundtruth/combined_pomaria_gt.mp4"
16
+ },
17
+ "pomaria_tx_rotation": {
18
+ "ours": "rir/ours/combined_pomaria_tx_rotation_ssl.mp4",
19
+ "baseline": "rir/baseline/combined_pomaria_tx_rotation_ele.mp4",
20
+ "groundtruth": "rir/groundtruth/combined_pomaria_tx_rotation_gt.mp4"
21
+ },
22
+ "pomaria_tx_moving": {
23
+ "ours": "rir/ours/combined_pomaria_tx_moving_ele.mp4",
24
+ "baseline": "rir/baseline/combined_pomaria_tx_moving_vanilla.mp4",
25
+ "groundtruth": "rir/groundtruth/combined_pomaria_tx_moving_gt.mp4"
26
+ },
27
+ "rs": {
28
+ "ours": "rir/ours/combined_rs_ele.mp4",
29
+ "baseline": "rir/baseline/combined_rs_vanilla.mp4",
30
+ "groundtruth": "rir/groundtruth/combined_rs_gt.mp4"
31
+ },
32
+ "rs_tx_rotation1": {
33
+ "ours": "rir/ours/combined_rs_tx_rotation1_ssl.mp4",
34
+ "baseline": "rir/baseline/combined_rs_tx_rotation1_ele.mp4",
35
+ "groundtruth": "rir/groundtruth/combined_rs_tx_rotation1_gt.mp4"
36
+ },
37
+ "rs_tx_rotation2": {
38
+ "ours": "rir/ours/combined_rs_tx_rotation2_ssl.mp4",
39
+ "baseline": "rir/baseline/combined_rs_tx_rotation2_ele.mp4",
40
+ "groundtruth": "rir/groundtruth/combined_rs_tx_rotation2_gt.mp4"
41
+ }
42
+ }