import streamlit as st
import pandas as pd
from io import BytesIO

def split_excel(file, columns, original_file_name, sheet_name="Linjer", header_row=3):
    try:
        # Load only the "Linjer" sheet
        df = pd.read_excel(file, sheet_name=sheet_name, header=header_row-1)  # Adjust for zero-based index
        log_messages = [f"Loaded worksheet '{sheet_name}'. Other worksheets were ignored."]
    except ValueError:
        return {}, [f"Worksheet '{sheet_name}' not found. Please upload a file with the correct structure."]
    except Exception as e:
        return {}, [f"An error occurred while reading the file: {e}"]

    split_files = {}
    all_unique_values = set()

    # Validate that both columns exist in the dataframe
    for col in columns:
        if col not in df.columns:
            log_messages.append(f"Column '{col}' not found in the worksheet. Please check the file.")
            return {}, log_messages

    # Collect all unique values across both columns
    for col in columns:
        unique_values = df[col].dropna().unique()
        log_messages.append(f"Found unique values in column '{col}': {list(unique_values)}")
        all_unique_values.update(unique_values)

    log_messages.append(f"Total unique values across columns: {list(all_unique_values)}")

    # Create files for each unique value
    for value in all_unique_values:
        split_df = df[(df[columns[0]] == value) | (df[columns[1]] == value)]
        if split_df.empty:
            log_messages.append(f"No rows found for value '{value}'. Skipping file creation.")
            continue

        sanitized_value = str(value).replace("/", "-").replace("\\", "-")
        file_name = f"{original_file_name} - {sanitized_value}.xlsx"

        output = BytesIO()
        with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
            split_df.to_excel(writer, index=False, sheet_name='Sheet1')
        output.seek(0)
        split_files[file_name] = output
        log_messages.append(f"File '{file_name}' created with {len(split_df)} rows.")
    
    return split_files, log_messages

st.title("BG File Splitter")
st.write("Upload an Excel file to split it based on unique values in specified columns.")

uploaded_file = st.file_uploader("Upload Excel File", type=["xlsx"])

if uploaded_file:
    st.write("Processing the uploaded file...")
    original_file_name = uploaded_file.name.rsplit(".", 1)[0]  # Extract the file name without extension
    columns_to_split = ['Afsenderkunde', 'Modtagerkunde']  # Column names to check for unique values

    split_files, logs = split_excel(uploaded_file, columns_to_split, original_file_name)

    st.write("### Logs")
    if logs:
        for log in logs:
            st.write(log)
    else:
        st.write("No logs available. Something might have gone wrong.")

    if split_files:
        st.write("### Download Split Files")
        for file_name, file_content in split_files.items():
            st.download_button(
                label=f"Download {file_name}",
                data=file_content,
                file_name=file_name,
                mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
            )
    else:
        st.write("No files were created. Check the logs for details.")