import streamlit as st
from transformers import AutoModel, AutoTokenizer
import torch

# The model name
model_name = "emilyalsentzer/Bio_ClinicalBERT"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Streamlit app UI
st.title("Medical Text Analysis with ClinicalBERT")
st.write("Type in a medical text input to get the CLS token embedding.")

# User input
text = st.text_input("Enter Medical Text")

if st.button("Predict"):
    if text.strip():
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
        outputs = model(**inputs)
        cls_embedding = outputs.last_hidden_state[:, 0, :].detach().numpy()
        st.write(f"CLS Embedding (first 5 values): {cls_embedding[0][:5]}")
    else:
        st.write("Please enter some text.")