import streamlit as st from transformers import AutoModel, AutoTokenizer import torch # The model name model_name = "emilyalsentzer/Bio_ClinicalBERT" # Load the tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Streamlit app UI st.title("Medical Text Analysis with ClinicalBERT") st.write("Type in a medical text input to get the CLS token embedding.") # User input text = st.text_input("Enter Medical Text") if st.button("Predict"): if text.strip(): inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) outputs = model(**inputs) cls_embedding = outputs.last_hidden_state[:, 0, :].detach().numpy() st.write(f"CLS Embedding (first 5 values): {cls_embedding[0][:5]}") else: st.write("Please enter some text.")