import streamlit as st
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
from huggingface_hub import hf_hub_download
import os

try:
    # Hugging Face Token
    hf_token = os.getenv("HF_TOKEN")
except Exception as e:
    st.error(f"No Hugging Face Token Found: {e}")
    st.stop()

# Define your model architecture
class SimpleNN(nn.Module):
    def __init__(self, input_size, n_classes):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_size, 512),  # Input
            nn.ReLU(),  # Activation for input
            nn.Linear(512, 512),  # Hidden
            nn.ReLU(),  # Activation for hidden
            nn.Linear(512, n_classes)  # Output
        )

    def forward(self, x):
        return self.model(x)

@st.cache_resource()
def load_model(hf_token):
    model_path = hf_hub_download(
        repo_id="louiecerv/cats_dogs_recognition_torch_nn",
        filename="cats_dogs_classifier.pth",
        token=hf_token  # Pass the token for authentication
    )
    input_size = 128 * 128 * 3  # Assuming input size is 128x128 with 3 color channels
    n_classes = 2  # Number of classes (Cat and Dog)
    model = SimpleNN(input_size, n_classes)
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))  # Load model state dict on CPU
    model.eval()
    return model


model = load_model(hf_token)

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize to match training size
    transforms.ToTensor(),
])

def main():
    # Streamlit UI
    st.title("Cats vs Dogs Classifier")

    about_text = """
### About This Model

**SimpleNN** is a basic Multi-Layer Perceptron (MLP) model with the following architecture:

- **Input Layer**: Flattens input and maps to 512 neurons.
- **Hidden Layer**: 512 neurons with ReLU activation.
- **Output Layer**: Maps to the number of output classes.

**Limitations**:
- Limited scalability and expressiveness.
- Prone to overfitting on small datasets.
- Less efficient on modern hardware.

**Foundations of Deep Learning**:
- ANNs use layered structures to learn data representations.
- Essential for tasks like classification and regression.
- Basis for deep learning with hierarchical representations.
"""
    with st.expander("ℹ️ About"):
        st.markdown(about_text)

    st.write("Upload an image, and the model will predict whether it's a cat or a dog.")

    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])

    if uploaded_file is not None:
        image = Image.open(uploaded_file)
                
        # Preprocess image
        image = transform(image).unsqueeze(0)  # Add batch dimension
        # Display preprocessed image
        st.image(image.squeeze().permute(1, 2, 0).numpy(), caption="Preprocessed Image", use_container_width=True)

        # Make prediction
        with torch.no_grad():
            output = model(image)
            probabilities = F.softmax(output, dim=1)
            prediction = torch.argmax(probabilities, dim=1).item()
        
        labels = ["Cat", "Dog"]
        st.write(f"Prediction: {labels[prediction]} ({probabilities[0][prediction].item() * 100:.2f}% confidence)")

if __name__ == "__main__":
    main()