Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Running

App Files Files Community

awacke1 commited on Dec 24, 2024

Commit

3f8c47a

verified ·

1 Parent(s): cbd471e

Update app.py

Browse files

Files changed (1) hide show

app.py +200 -59

app.py CHANGED Viewed

@@ -33,6 +33,7 @@ st.set_page_config(
 )
 load_dotenv()
 USER_NAMES = [
     "Aria", "Guy", "Sonia", "Tony", "Jenny", "Davis", "Libby", "Clara", "Liam", "Natasha", "William"
 ]
@@ -45,6 +46,12 @@ ENGLISH_VOICES = [
 USER_VOICES = dict(zip(USER_NAMES, ENGLISH_VOICES))
 if 'user_name' not in st.session_state:
     st.session_state['user_name'] = USER_NAMES[0]
 if 'old_val' not in st.session_state:
@@ -53,14 +60,11 @@ if 'viewing_prefix' not in st.session_state:
     st.session_state['viewing_prefix'] = None
 if 'should_rerun' not in st.session_state:
     st.session_state['should_rerun'] = False
-FILE_EMOJIS = {
-    "md": "📝",
-    "mp3": "🎵",
-}
 def get_high_info_terms(text: str) -> list:
-    # Expanded stop words
     stop_words = set([
         'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
         'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were',
@@ -71,7 +75,6 @@ def get_high_info_terms(text: str) -> list:
         'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there', 'as', 'if', 'while'
     ])
-    # Key phrases tailored to your interests
     key_phrases = [
         'artificial intelligence', 'machine learning', 'deep learning', 'neural networks',
         'natural language processing', 'healthcare systems', 'clinical medicine',
@@ -81,16 +84,14 @@ def get_high_info_terms(text: str) -> list:
         'quantum mechanics', 'biomedical engineering', 'computational biology'
     ]
-    # Preserve key phrases and remove them from the text
     preserved_phrases = []
     lower_text = text.lower()
     for phrase in key_phrases:
         if phrase in lower_text:
             preserved_phrases.append(phrase)
             text = text.replace(phrase, '')
-            break  # Stop after the first matching key phrase
-    # Extract words and filter high-info terms
     words = re.findall(r'\b\w+(?:-\w+)*\b', text)
     high_info_words = [
         word.lower() for word in words
@@ -100,7 +101,6 @@ def get_high_info_terms(text: str) -> list:
         and any(c.isalpha() for c in word)
     ]
-    # Combine preserved phrases and filtered words, ensuring uniqueness
     unique_terms = []
     seen = set()
     for term in preserved_phrases + high_info_words:
@@ -108,7 +108,6 @@ def get_high_info_terms(text: str) -> list:
             seen.add(term)
             unique_terms.append(term)
-    # Return only the top 5 terms
     return unique_terms[:5]
 def clean_text_for_filename(text: str) -> str:
@@ -120,12 +119,9 @@ def clean_text_for_filename(text: str) -> str:
     return '_'.join(filtered)[:200]
 def generate_filename(prompt, response, file_type="md"):
-    # Adjust timezone to Central Time
     central_tz = pytz.timezone('America/Chicago')
     central_time = datetime.now(central_tz)
-    # Format the prefix to include the required format
-    prefix = central_time.strftime("%m-%d-%y_%I-%M-%p_")  # e.g., 12-20-24_11-34-AM_
     combined = (prompt + " " + response).strip()
     info_terms = get_high_info_terms(combined)
@@ -160,6 +156,7 @@ def clean_for_speech(text: str) -> str:
     text = re.sub(r"\s+", " ", text).strip()
     return text
 async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
     text = clean_for_speech(text)
     if not text.strip():
@@ -184,6 +181,7 @@ def play_and_download_audio(file_path):
         dl_link = f'<a href="data:audio/mpeg;base64,{base64.b64encode(open(file_path,"rb").read()).decode()}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>'
         st.markdown(dl_link, unsafe_allow_html=True)
 def load_files_for_sidebar():
     md_files = glob.glob("*.md")
     mp3_files = glob.glob("*.mp3")
@@ -213,6 +211,33 @@ def extract_keywords_from_md(files):
             text += " " + c
     return get_high_info_terms(text)
 def display_file_manager_sidebar(groups, sorted_prefixes):
     st.sidebar.title("🎵 Audio & Docs Manager")
@@ -263,40 +288,143 @@ def display_file_manager_sidebar(groups, sorted_prefixes):
                 ctime = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
                 st.write(f"**{fname}** - {ctime}")
-def create_zip_of_files(md_files, mp3_files):
-    md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
-    all_files = md_files + mp3_files
-    if not all_files:
         return None
-    all_content = []
-    for f in all_files:
-        if f.endswith('.md'):
-            with open(f,'r',encoding='utf-8') as file:
-                all_content.append(file.read())
-        elif f.endswith('.mp3'):
-            all_content.append(os.path.basename(f))
-    combined_content = " ".join(all_content)
-    info_terms = get_high_info_terms(combined_content)
-    timestamp = datetime.now().strftime("%y%m_%H%M")
-    name_text = '_'.join(term.replace(' ', '-') for term in info_terms[:3])
-    zip_name = f"{timestamp}_{name_text}.zip"
-    with zipfile.ZipFile(zip_name,'w') as z:
-        for f in all_files:
-            z.write(f)
-    return zip_name
-def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False):
-    """Perform Arxiv search (via your RAG pattern) and generate audio summaries."""
     start = time.time()
     client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
-    # The next lines call your RAG pipeline
-    refs = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")[0]
-    r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
     result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
@@ -340,16 +468,22 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary
                     st.write("### 🔖 Titles")
                     play_and_download_audio(audio_file_titles)
-    # show text last after playback interfaces.   For the big one lets add a feature later that breaks into their own.
     st.markdown(result)
     elapsed = time.time()-start
     st.write(f"**Total Elapsed:** {elapsed:.2f} s")
-    create_file(q, result, "md")
-    return result
 def main():
     st.session_state['user_name'] = st.selectbox("Current User:", USER_NAMES, index=0)
@@ -398,15 +532,18 @@ def main():
                 # Save user input
                 create_file(st.session_state['user_name'], voice_text, "md")
-                # Perform ArXiv search automatically
-                with st.spinner("Searching ArXiv..."):
-                    # Always do vocal_summary = True, extended_refs=False, titles_summary=True, full_audio=False
-                    result = perform_ai_lookup(voice_text, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False)
-                # Update old_val
                 st.session_state['old_val'] = voice_text
-                # Clear the text by rerunning
-                #st.rerun()
         st.write("Speak a query to run an ArXiv search and hear the results.")
@@ -453,11 +590,15 @@ def main():
     with tabs[2]:
         st.subheader("⚙️ Settings")
-        st.write("Currently no additional settings.")
     if st.session_state.should_rerun:
         st.session_state.should_rerun = False
         st.rerun()
-if __name__=="__main__":
-    main()

 )
 load_dotenv()
+# -------------------- Constants --------------------
 USER_NAMES = [
     "Aria", "Guy", "Sonia", "Tony", "Jenny", "Davis", "Libby", "Clara", "Liam", "Natasha", "William"
 ]
 USER_VOICES = dict(zip(USER_NAMES, ENGLISH_VOICES))
+FILE_EMOJIS = {
+    "md": "📝",
+    "mp3": "🎵",
+}
+# -------------------- Session State Initialization --------------------
 if 'user_name' not in st.session_state:
     st.session_state['user_name'] = USER_NAMES[0]
 if 'old_val' not in st.session_state:
     st.session_state['viewing_prefix'] = None
 if 'should_rerun' not in st.session_state:
     st.session_state['should_rerun'] = False
+if 'use_streaming' not in st.session_state:
+    st.session_state['use_streaming'] = True
+# -------------------- Helper Functions --------------------
 def get_high_info_terms(text: str) -> list:
     stop_words = set([
         'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
         'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were',
         'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there', 'as', 'if', 'while'
     ])
     key_phrases = [
         'artificial intelligence', 'machine learning', 'deep learning', 'neural networks',
         'natural language processing', 'healthcare systems', 'clinical medicine',
         'quantum mechanics', 'biomedical engineering', 'computational biology'
     ]
     preserved_phrases = []
     lower_text = text.lower()
     for phrase in key_phrases:
         if phrase in lower_text:
             preserved_phrases.append(phrase)
             text = text.replace(phrase, '')
+            break
     words = re.findall(r'\b\w+(?:-\w+)*\b', text)
     high_info_words = [
         word.lower() for word in words
         and any(c.isalpha() for c in word)
     ]
     unique_terms = []
     seen = set()
     for term in preserved_phrases + high_info_words:
             seen.add(term)
             unique_terms.append(term)
     return unique_terms[:5]
 def clean_text_for_filename(text: str) -> str:
     return '_'.join(filtered)[:200]
 def generate_filename(prompt, response, file_type="md"):
     central_tz = pytz.timezone('America/Chicago')
     central_time = datetime.now(central_tz)
+    prefix = central_time.strftime("%m-%d-%y_%I-%M-%p_")
     combined = (prompt + " " + response).strip()
     info_terms = get_high_info_terms(combined)
     text = re.sub(r"\s+", " ", text).strip()
     return text
+# -------------------- Audio Functions --------------------
 async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
     text = clean_for_speech(text)
     if not text.strip():
         dl_link = f'<a href="data:audio/mpeg;base64,{base64.b64encode(open(file_path,"rb").read()).decode()}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>'
         st.markdown(dl_link, unsafe_allow_html=True)
+# -------------------- File Management Functions --------------------
 def load_files_for_sidebar():
     md_files = glob.glob("*.md")
     mp3_files = glob.glob("*.mp3")
             text += " " + c
     return get_high_info_terms(text)
+def create_zip_of_files(md_files, mp3_files):
+    md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
+    all_files = md_files + mp3_files
+    if not all_files:
+        return None
+    all_content = []
+    for f in all_files:
+        if f.endswith('.md'):
+            with open(f,'r',encoding='utf-8') as file:
+                all_content.append(file.read())
+        elif f.endswith('.mp3'):
+            all_content.append(os.path.basename(f))
+    combined_content = " ".join(all_content)
+    info_terms = get_high_info_terms(combined_content)
+    timestamp = datetime.now().strftime("%y%m_%H%M")
+    name_text = '_'.join(term.replace(' ', '-') for term in info_terms[:3])
+    zip_name = f"{timestamp}_{name_text}.zip"
+    with zipfile.ZipFile(zip_name,'w') as z:
+        for f in all_files:
+            z.write(f)
+    return zip_name
 def display_file_manager_sidebar(groups, sorted_prefixes):
     st.sidebar.title("🎵 Audio & Docs Manager")
                 ctime = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
                 st.write(f"**{fname}** - {ctime}")
+# -------------------- xAI API Functions --------------------
+def call_xai_api_batch(query: str) -> dict:
+    """
+    Call the xAI API in batch mode for complete responses.
+    """
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {os.environ.get('xai')}"
+    }
+    data = {
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a helpful scientific research assistant. Analyze the following research query and provide initial insights."
+            },
+            {
+                "role": "user",
+                "content": query
+            }
+        ],
+        "model": "grok-2-1212",
+        "stream": False,
+        "temperature": 0.7
+    }
+    try:
+        response = requests.post(
+            "https://api.x.ai/v1/chat/completions",
+            headers=headers,
+            json=data,
+            timeout=30
+        )
+        response.raise_for_status()
+        return response.json()
+    except requests.exceptions.RequestException as e:
+        st.error(f"Error in batch xAI API call: {str(e)}")
         return None
+def stream_xai_response(query: str, placeholder) -> str:
+    """
+    Stream the xAI API response and display it in real-time.
+    Returns the complete response text.
+    """
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {os.environ.get('xai')}"
+    }
+    data = {
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a helpful scientific research assistant. Analyze the following research query and provide initial insights."
+            },
+            {
+                "role": "user",
+                "content": query
+            }
+        ],
+        "model": "grok-2-1212",
+        "stream": True,
+        "temperature": 0.7
+    }
+    try:
+        response = requests.post(
+            "https://api.x.ai/v1/chat/completions",
+            headers=headers,
+            json=data,
+            stream=True,
+            timeout=30
+        )
+        response.raise_for_status()
+        full_response = ""
+        for line in response.iter_lines():
+            if line:
+                line = line.decode('utf-8')
+                if line.startswith('data: '):
+                    json_str = line[6:]  # Remove 'data: ' prefix
+                    if json_str == '[DONE]':
+                        break
+                    try:
+                        chunk = json.loads(json_str)
+                        if chunk["choices"][0]["delta"].get("content"):
+                            content = chunk["choices"][0]["delta"]["content"]
+                            full_response += content
+                            # Update the placeholder with accumulated text
+                            placeholder.markdown(full_response + "▌")
+                    except json.JSONDecodeError:
+                        continue
+        # Final update without the cursor
+        placeholder.markdown(full_response)
+        return full_response
+    except requests.exceptions.RequestException as e:
+        st.error(f"Error in streaming xAI API call: {str(e)}")
+        return None
+# -------------------- Main AI Lookup Function --------------------
+def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, use_streaming=True):
+    """Perform Arxiv search with initial xAI insights."""
     start = time.time()
+    # First, get xAI insights
+    st.write("### 🤖 Initial AI Insights")
+    initial_insights = None
+    if use_streaming:
+        # Create a placeholder for streaming text
+        streaming_placeholder = st.empty()
+        with st.spinner("Getting streaming AI insights..."):
+            initial_insights = stream_xai_response(q, streaming_placeholder)
+    else:
+        with st.spinner("Getting batch AI insights..."):
+            xai_response = call_xai_api_batch(q)
+            if xai_response and 'choices' in xai_response:
+                initial_insights = xai_response['choices'][0]['message']['content']
+                st.markdown(initial_insights)
+    # Generate audio for xAI insights if enabled
+    if vocal_summary and initial_insights:
+        insights_text = clean_for_speech(initial_insights)
+        if insights_text.strip():
+            audio_file_insights = speak_with_edge_tts(insights_text)
+            if audio_file_insights:
+                st.write("### 🎤 AI Insights Audio")
+                play_and_download_audio(audio_file_insights)
+    # Proceed with existing ArXiv search
+    st.write("### 📚 ArXiv Results")
     client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
+    refs = client.predict(q, 20, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0]
+    r2 = client.predict(q, "mistralai/Mixtral-8x7B-Instruct-v0.1", True, api_name="/ask_llm")
     result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
                     st.write("### 🔖 Titles")
                     play_and_download_audio(audio_file_titles)
     st.markdown(result)
+    # Save complete results including xAI insights
+    if initial_insights:
+        full_result = f"### 🤖 Initial AI Insights\n\n{initial_insights}\n\n{result}"
+    else:
+        full_result = result
+    create_file(q, full_result, "md")
     elapsed = time.time()-start
     st.write(f"**Total Elapsed:** {elapsed:.2f} s")
+    return full_result
+# -------------------- Main Application --------------------
 def main():
     st.session_state['user_name'] = st.selectbox("Current User:", USER_NAMES, index=0)
                 # Save user input
                 create_file(st.session_state['user_name'], voice_text, "md")
+                # Perform AI lookup with current streaming setting
+                with st.spinner("Processing..."):
+                    result = perform_ai_lookup(
+                        voice_text,
+                        vocal_summary=True,
+                        extended_refs=False,
+                        titles_summary=True,
+                        full_audio=False,
+                        use_streaming=st.session_state['use_streaming']
+                    )
                 st.session_state['old_val'] = voice_text
         st.write("Speak a query to run an ArXiv search and hear the results.")
     with tabs[2]:
         st.subheader("⚙️ Settings")
+        st.session_state['use_streaming'] = st.toggle(
+            "Use streaming responses",
+            value=st.session_state['use_streaming'],
+            help="Enable to see AI responses as they are generated in real-time"
+        )
     if st.session_state.should_rerun:
         st.session_state.should_rerun = False
         st.rerun()
+if __name__ == "__main__":
+    main()