ReithBjarkan commited on
Commit
d32067c
·
1 Parent(s): bf8026d

Initial upload of Streamlit app for keyword similarity

Browse files
Files changed (2) hide show
  1. app.py +71 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from sentence_transformers import SentenceTransformer
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ import openai
5
+ import numpy as np
6
+
7
+ # App title
8
+ st.title("Keyword Cosine Similarity Tool")
9
+
10
+ # Inputs
11
+ st.header("Input Parameters")
12
+ primary_keyword = st.text_input("Primary Keyword", placeholder="Enter your primary keyword")
13
+ keywords = st.text_area("Keywords to Compare", placeholder="Enter keywords separated by new lines")
14
+ model_name = st.selectbox("Select Embedding Model", ["sentence-transformers/LaBSE", "all-MiniLM-L6-v2", "OpenAI Embeddings"])
15
+ openai_api_key = st.text_input("OpenAI API Key (optional)", type="password")
16
+
17
+ # Process Button
18
+ if st.button("Calculate Similarities"):
19
+ if not primary_keyword or not keywords:
20
+ st.error("Please provide both the primary keyword and keywords to compare.")
21
+ else:
22
+ keyword_list = [kw.strip() for kw in keywords.split("\n") if kw.strip()]
23
+
24
+ if model_name.startswith("sentence-transformers"):
25
+ # Load model
26
+ st.info(f"Loading model: {model_name}")
27
+ model = SentenceTransformer(model_name)
28
+
29
+ # Generate embeddings
30
+ st.info("Generating embeddings...")
31
+ primary_embedding = model.encode(primary_keyword, convert_to_tensor=True)
32
+ keyword_embeddings = model.encode(keyword_list, convert_to_tensor=True)
33
+
34
+ elif model_name == "OpenAI Embeddings":
35
+ if not openai_api_key:
36
+ st.error("Please provide your OpenAI API key for this model.")
37
+ else:
38
+ openai.api_key = openai_api_key
39
+ st.info("Generating OpenAI embeddings...")
40
+
41
+ def get_openai_embedding(text):
42
+ response = openai.Embedding.create(
43
+ model="text-embedding-ada-002",
44
+ input=text
45
+ )
46
+ return np.array(response['data'][0]['embedding'])
47
+
48
+ primary_embedding = get_openai_embedding(primary_keyword)
49
+ keyword_embeddings = np.array([get_openai_embedding(kw) for kw in keyword_list])
50
+
51
+ else:
52
+ st.error("Invalid model selection.")
53
+ st.stop()
54
+
55
+ # Calculate cosine similarities
56
+ st.info("Calculating cosine similarities...")
57
+ similarities = cosine_similarity([primary_embedding], keyword_embeddings)[0]
58
+
59
+ # Display results
60
+ st.header("Results")
61
+ results = [{"Keyword": kw, "Cosine Similarity": sim} for kw, sim in zip(keyword_list, similarities)]
62
+ st.table(results)
63
+
64
+ # Debugging/Intermediate Data
65
+ st.header("Debugging Info")
66
+ st.write("Primary Embedding:", primary_embedding)
67
+ st.write("Keyword Embeddings:", keyword_embeddings)
68
+
69
+ # Footer
70
+ st.markdown("---")
71
+ st.markdown("Created by [Your Name](https://huggingface.co/yourprofile)")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ sentence-transformers
3
+ scikit-learn
4
+ openai
5
+ numpy