ndurner commited on
Commit
1c05cb6
·
1 Parent(s): 90b6fdb

well-defined export files

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +36 -55
  3. chat_export.py +209 -0
  4. requirements.txt +1 -1
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🤖
4
  colorFrom: yellow
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 5.13.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
4
  colorFrom: yellow
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 5.7.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py CHANGED
@@ -7,6 +7,7 @@ import fitz
7
  from PIL import Image
8
  import io
9
  from settings_mgr import generate_download_settings_js, generate_upload_settings_js
 
10
 
11
  from doc2json import process_docx
12
  from code_exec import eval_restricted_script
@@ -118,7 +119,8 @@ def encode_file(fn: str) -> list:
118
  user_msg_parts.append({"type": "image_url",
119
  "image_url":{"url": content}})
120
  else:
121
- user_msg_parts.append({"type": "text", "text": content})
 
122
 
123
  return user_msg_parts
124
 
@@ -153,14 +155,15 @@ def bot(message, history, oai_key, system_prompt, seed, temperature, max_tokens,
153
  if model == "whisper":
154
  result = ""
155
  whisper_prompt = system_prompt
156
- for human, assi in history:
157
- if human is not None:
158
- if type(human) is tuple:
 
159
  pass
160
  else:
161
- whisper_prompt += f"\n{human}"
162
- if assi is not None:
163
- whisper_prompt += f"\n{assi}"
164
 
165
  if message["text"]:
166
  whisper_prompt += message["text"]
@@ -231,19 +234,24 @@ def bot(message, history, oai_key, system_prompt, seed, temperature, max_tokens,
231
  role = "developer"
232
  history_openai_format.append({"role": role, "content": system_prompt})
233
 
234
- for human, assi in history:
235
- if human is not None:
236
- if type(human) is tuple:
237
- user_msg_parts.extend(encode_file(human[0]))
 
 
 
 
 
238
  else:
239
- user_msg_parts.append({"type": "text", "text": human})
240
 
241
- if assi is not None:
242
  if user_msg_parts:
243
  history_openai_format.append({"role": "user", "content": user_msg_parts})
244
  user_msg_parts = []
245
 
246
- history_openai_format.append({"role": "assistant", "content": assi})
247
 
248
  if message["text"]:
249
  user_msg_parts.append({"type": "text", "text": message["text"]})
@@ -378,28 +386,16 @@ def bot(message, history, oai_key, system_prompt, seed, temperature, max_tokens,
378
  except Exception as e:
379
  raise gr.Error(f"Error: {str(e)}")
380
 
381
- def import_history(history, file):
382
- with open(file.name, mode="rb") as f:
383
- content = f.read()
384
-
385
- if isinstance(content, bytes):
386
- content = content.decode('utf-8', 'replace')
387
- else:
388
- content = str(content)
389
- os.remove(file.name)
390
-
391
- # Deserialize the JSON content
392
- import_data = json.loads(content)
393
-
394
- # Check if 'history' key exists for backward compatibility
395
- if 'history' in import_data:
396
- history = import_data['history']
397
- system_prompt.value = import_data.get('system_prompt', '') # Set default if not present
398
- else:
399
- # Assume it's an old format with only history data
400
- history = import_data
401
 
402
- return history, system_prompt.value # Return system prompt value to be set in the UI
 
403
 
404
  with gr.Blocks(delete_cache=(86400, 86400)) as demo:
405
  gr.Markdown("# OAI Chat (Nils' Version™️)")
@@ -456,7 +452,7 @@ with gr.Blocks(delete_cache=(86400, 86400)) as demo:
456
  dl_settings_button.click(None, controls, js=generate_download_settings_js("oai_chat_settings.bin", control_ids))
457
  ul_settings_button.click(None, None, None, js=generate_upload_settings_js(control_ids))
458
 
459
- chat = gr.ChatInterface(fn=bot, multimodal=True, additional_inputs=controls, autofocus = False)
460
  chat.textbox.file_count = "multiple"
461
  chat.textbox.max_plain_text_length = 2**31
462
  chatbot = chat.chatbot
@@ -472,24 +468,7 @@ with gr.Blocks(delete_cache=(86400, 86400)) as demo:
472
  with gr.Accordion("Import/Export", open = False):
473
  import_button = gr.UploadButton("History Import")
474
  export_button = gr.Button("History Export")
475
- export_button.click(lambda: None, [chatbot, system_prompt], js="""
476
- (chat_history, system_prompt) => {
477
- const export_data = {
478
- history: chat_history,
479
- system_prompt: system_prompt
480
- };
481
- const history_json = JSON.stringify(export_data);
482
- const blob = new Blob([history_json], {type: 'application/json'});
483
- const url = URL.createObjectURL(blob);
484
- const a = document.createElement('a');
485
- a.href = url;
486
- a.download = 'chat_history.json';
487
- document.body.appendChild(a);
488
- a.click();
489
- document.body.removeChild(a);
490
- URL.revokeObjectURL(url);
491
- }
492
- """)
493
  dl_button = gr.Button("File download")
494
  dl_button.click(lambda: None, [chatbot], js="""
495
  (chat_history) => {
@@ -544,7 +523,9 @@ with gr.Blocks(delete_cache=(86400, 86400)) as demo:
544
  }
545
  }
546
  """)
547
- import_button.upload(import_history, inputs=[chatbot, import_button], outputs=[chatbot, system_prompt])
 
 
548
 
549
  demo.unload(lambda: [os.remove(file) for file in temp_files])
550
  demo.queue(default_concurrency_limit = None).launch()
 
7
  from PIL import Image
8
  import io
9
  from settings_mgr import generate_download_settings_js, generate_upload_settings_js
10
+ from chat_export import import_history, get_export_js
11
 
12
  from doc2json import process_docx
13
  from code_exec import eval_restricted_script
 
119
  user_msg_parts.append({"type": "image_url",
120
  "image_url":{"url": content}})
121
  else:
122
+ fn = os.path.basename(fn)
123
+ user_msg_parts.append({"type": "text", "text": f"```{fn}\n{content}\n```"})
124
 
125
  return user_msg_parts
126
 
 
155
  if model == "whisper":
156
  result = ""
157
  whisper_prompt = system_prompt
158
+ for msg in history:
159
+ content = msg["content"]
160
+ if msg["role"] == "user":
161
+ if type(content) is tuple:
162
  pass
163
  else:
164
+ whisper_prompt += f"\n{content}"
165
+ if msg["role"] == "assistant":
166
+ whisper_prompt += f"\n{content}"
167
 
168
  if message["text"]:
169
  whisper_prompt += message["text"]
 
234
  role = "developer"
235
  history_openai_format.append({"role": role, "content": system_prompt})
236
 
237
+ for msg in history:
238
+ role = msg["role"]
239
+ content = msg["content"]
240
+
241
+ if role == "user":
242
+ if isinstance(content, gr.File) or isinstance(content, gr.Image):
243
+ user_msg_parts.extend(encode_file(content.value['path']))
244
+ elif isinstance(content, tuple):
245
+ user_msg_parts.extend(encode_file(content[0]))
246
  else:
247
+ user_msg_parts.append({"type": "text", "text": content})
248
 
249
+ if role == "assistant":
250
  if user_msg_parts:
251
  history_openai_format.append({"role": "user", "content": user_msg_parts})
252
  user_msg_parts = []
253
 
254
+ history_openai_format.append({"role": "assistant", "content": content})
255
 
256
  if message["text"]:
257
  user_msg_parts.append({"type": "text", "text": message["text"]})
 
386
  except Exception as e:
387
  raise gr.Error(f"Error: {str(e)}")
388
 
389
+ def import_history_guarded(oai_key, history, file):
390
+ # check credentials first
391
+ try:
392
+ client = OpenAI(api_key=oai_key)
393
+ client.models.retrieve("gpt-4o")
394
+ except Exception as e:
395
+ raise gr.Error(f"OpenAI login error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
+ # actual import
398
+ return import_history(history, file)
399
 
400
  with gr.Blocks(delete_cache=(86400, 86400)) as demo:
401
  gr.Markdown("# OAI Chat (Nils' Version™️)")
 
452
  dl_settings_button.click(None, controls, js=generate_download_settings_js("oai_chat_settings.bin", control_ids))
453
  ul_settings_button.click(None, None, None, js=generate_upload_settings_js(control_ids))
454
 
455
+ chat = gr.ChatInterface(fn=bot, multimodal=True, additional_inputs=controls, autofocus = False, type = "messages")
456
  chat.textbox.file_count = "multiple"
457
  chat.textbox.max_plain_text_length = 2**31
458
  chatbot = chat.chatbot
 
468
  with gr.Accordion("Import/Export", open = False):
469
  import_button = gr.UploadButton("History Import")
470
  export_button = gr.Button("History Export")
471
+ export_button.click(lambda: None, [chatbot, system_prompt], js=get_export_js())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
  dl_button = gr.Button("File download")
473
  dl_button.click(lambda: None, [chatbot], js="""
474
  (chat_history) => {
 
523
  }
524
  }
525
  """)
526
+ import_button.upload(import_history_guarded,
527
+ inputs=[oai_key, chatbot, import_button],
528
+ outputs=[chatbot, system_prompt])
529
 
530
  demo.unload(lambda: [os.remove(file) for file in temp_files])
531
  demo.queue(default_concurrency_limit = None).launch()
chat_export.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import base64
3
+ import os, io
4
+ import mimetypes
5
+ from PIL import Image
6
+ import gradio as gr
7
+
8
+ def import_history(history, file):
9
+ if os.path.getsize(file.name) > 100e6:
10
+ raise ValueError("History larger than 100 MB")
11
+
12
+ with open(file.name, mode="rb") as f:
13
+ content = f.read().decode('utf-8', 'replace')
14
+
15
+ import_data = json.loads(content)
16
+
17
+ # Handle different import formats
18
+ if 'messages' in import_data:
19
+ # New OpenAI-style format
20
+ messages = import_data['messages']
21
+ system_prompt_value = ''
22
+ chat_history = []
23
+
24
+ msg_num = 1
25
+ for msg in messages:
26
+ if msg['role'] == 'system':
27
+ system_prompt_value = msg['content']
28
+ continue
29
+
30
+ if msg['role'] == 'user':
31
+ content = msg['content']
32
+ if isinstance(content, list):
33
+ for item in content:
34
+ if item.get('type', '') == 'image_url':
35
+ # Create gr.Image from data URI
36
+ image_data = base64.b64decode(item['image_url']['url'].split(',')[1])
37
+ img = Image.open(io.BytesIO(image_data))
38
+ chat_history.append({
39
+ "role": msg['role'],
40
+ "content": gr.Image(value=img)
41
+ })
42
+ elif item.get('type', '') == 'file':
43
+ # Handle file content with gr.File
44
+ fname = os.path.basename(item['file'].get('name', f'download{msg_num}'))
45
+ dir_path = os.path.dirname(file.name)
46
+ temp_path = os.path.join(dir_path, fname)
47
+ file_data = base64.b64decode(item['file']['url'].split(',')[1])
48
+ if (len(file_data) > 15e6):
49
+ raise ValueError(f"file content `{fname}` larger than 15 MB")
50
+
51
+ with open(temp_path, "wb") as tempf:
52
+ tempf.write(file_data)
53
+ chat_history.append({
54
+ "role": msg['role'],
55
+ "content": gr.File(value=temp_path,
56
+ label=fname)
57
+ })
58
+ else:
59
+ chat_history.append(msg)
60
+ else:
61
+ chat_history.append(msg)
62
+
63
+ elif msg['role'] == 'assistant':
64
+ chat_history.append(msg)
65
+
66
+ msg_num = msg_num + 1
67
+
68
+ else:
69
+ # Legacy format handling
70
+ if 'history' in import_data:
71
+ legacy_history = import_data['history']
72
+ system_prompt_value = import_data.get('system_prompt', '')
73
+ else:
74
+ legacy_history = import_data
75
+ system_prompt_value = ''
76
+
77
+ chat_history = []
78
+ # Convert tuple/pair format to messages format
79
+ for pair in legacy_history:
80
+ if pair[0]: # User message
81
+ if isinstance(pair[0], dict) and 'file' in pair[0]:
82
+ if 'data' in pair[0]['file']:
83
+ # Legacy format with embedded data
84
+ file_data = pair[0]['file']['data']
85
+ mime_type = file_data.split(';')[0].split(':')[1]
86
+
87
+ if mime_type.startswith('image/'):
88
+ image_data = base64.b64decode(file_data.split(',')[1])
89
+ img = Image.open(io.BytesIO(image_data))
90
+ chat_history.append({
91
+ "role": "user",
92
+ "content": gr.Image(value=img)
93
+ })
94
+ else:
95
+ fname = pair[0]['file'].get('name', 'download')
96
+ dir_path = os.path.dirname(file.name)
97
+ temp_path = os.path.join(dir_path, fname)
98
+ file_data = base64.b64decode(file_data.split(',')[1])
99
+
100
+ with open(temp_path, "wb") as tempf:
101
+ tempf.write(file_data)
102
+ chat_history.append({
103
+ "role": "user",
104
+ "content": gr.File(value=temp_path,
105
+ label=fname)
106
+ })
107
+ else:
108
+ # Keep as-is but convert to message format
109
+ chat_history.append({
110
+ "role": "user",
111
+ "content": pair[0]
112
+ })
113
+ else:
114
+ chat_history.append({
115
+ "role": "user",
116
+ "content": pair[0]
117
+ })
118
+
119
+ if pair[1]: # Assistant message
120
+ chat_history.append({
121
+ "role": "assistant",
122
+ "content": pair[1]
123
+ })
124
+
125
+ return chat_history, system_prompt_value
126
+
127
+ def get_export_js():
128
+ return """
129
+ async (chat_history, system_prompt) => {
130
+ let messages = [];
131
+
132
+ if (system_prompt) {
133
+ messages.push({
134
+ "role": "system",
135
+ "content": system_prompt
136
+ });
137
+ }
138
+
139
+ async function processFile(file_url) {
140
+ const response = await fetch(file_url);
141
+ const blob = await response.blob();
142
+ return new Promise((resolve) => {
143
+ const reader = new FileReader();
144
+ reader.onloadend = () => resolve({
145
+ data: reader.result,
146
+ type: blob.type
147
+ });
148
+ reader.onerror = (error) => resolve(null);
149
+ reader.readAsDataURL(blob);
150
+ });
151
+ }
152
+
153
+ for (let message of chat_history) {
154
+ if (!message.role || !message.content) continue;
155
+
156
+ if (message.content && typeof message.content === 'object') {
157
+ if (message.content.file) {
158
+ try {
159
+ const file_data = await processFile(message.content.file.url);
160
+ if (!file_data) continue;
161
+
162
+ if (file_data.type.startsWith('image/')) {
163
+ messages.push({
164
+ "role": message.role,
165
+ "content": [{
166
+ "type": "image_url",
167
+ "image_url": {
168
+ "url": file_data.data
169
+ }
170
+ }]
171
+ });
172
+ } else {
173
+ const fileLink = document.querySelector(`a[data-testid="chatbot-file"][download][href*="${message.content.file.url.split('/').pop()}"]`);
174
+ const fileName = fileLink ? fileLink.getAttribute('download') : (message.content.file.name || "download");
175
+
176
+ messages.push({
177
+ "role": message.role,
178
+ "content": [{
179
+ "type": "file",
180
+ "file": {
181
+ "url": file_data.data,
182
+ "name": fileName,
183
+ "mime_type": file_data.type
184
+ }
185
+ }]
186
+ });
187
+ }
188
+ } catch (error) {}
189
+ }
190
+ } else {
191
+ messages.push({
192
+ "role": message.role,
193
+ "content": message.content
194
+ });
195
+ }
196
+ }
197
+
198
+ const export_data = { messages };
199
+ const blob = new Blob([JSON.stringify(export_data)], {type: 'application/json'});
200
+ const url = URL.createObjectURL(blob);
201
+ const a = document.createElement('a');
202
+ a.href = url;
203
+ a.download = 'chat_history.json';
204
+ document.body.appendChild(a);
205
+ a.click();
206
+ document.body.removeChild(a);
207
+ URL.revokeObjectURL(url);
208
+ }
209
+ """
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio == 5.13.1
2
  openai == 1.60.1
3
  lxml
4
  PyMuPDF
 
1
+ gradio == 5.7.1
2
  openai == 1.60.1
3
  lxml
4
  PyMuPDF