import streamlit as st import pandas as pd from io import BytesIO def split_excel(file, columns, original_file_name, sheet_name="Linjer", header_row=3): try: # Load only the "Linjer" sheet df = pd.read_excel(file, sheet_name=sheet_name, header=header_row-1) # Adjust for zero-based index log_messages = [f"Loaded worksheet '{sheet_name}'. Other worksheets were ignored."] except ValueError: return {}, [f"Worksheet '{sheet_name}' not found. Please upload a file with the correct structure."] except Exception as e: return {}, [f"An error occurred while reading the file: {e}"] split_files = {} all_unique_values = set() # Validate that both columns exist in the dataframe for col in columns: if col not in df.columns: log_messages.append(f"Column '{col}' not found in the worksheet. Please check the file.") return {}, log_messages # Collect all unique values across both columns for col in columns: unique_values = df[col].dropna().unique() log_messages.append(f"Found unique values in column '{col}': {list(unique_values)}") all_unique_values.update(unique_values) log_messages.append(f"Total unique values across columns: {list(all_unique_values)}") # Create files for each unique value for value in all_unique_values: split_df = df[(df[columns[0]] == value) | (df[columns[1]] == value)] if split_df.empty: log_messages.append(f"No rows found for value '{value}'. Skipping file creation.") continue sanitized_value = str(value).replace("/", "-").replace("\\", "-") file_name = f"{original_file_name} - {sanitized_value}.xlsx" output = BytesIO() with pd.ExcelWriter(output, engine='xlsxwriter') as writer: split_df.to_excel(writer, index=False, sheet_name='Sheet1') output.seek(0) split_files[file_name] = output log_messages.append(f"File '{file_name}' created with {len(split_df)} rows.") return split_files, log_messages st.title("BG File Splitter") st.write("Upload an Excel file to split it based on unique values in specified columns.") uploaded_file = st.file_uploader("Upload Excel File", type=["xlsx"]) if uploaded_file: st.write("Processing the uploaded file...") original_file_name = uploaded_file.name.rsplit(".", 1)[0] # Extract the file name without extension columns_to_split = ['Afsenderkunde', 'Modtagerkunde'] # Column names to check for unique values split_files, logs = split_excel(uploaded_file, columns_to_split, original_file_name) st.write("### Logs") if logs: for log in logs: st.write(log) else: st.write("No logs available. Something might have gone wrong.") if split_files: st.write("### Download Split Files") for file_name, file_content in split_files.items(): st.download_button( label=f"Download {file_name}", data=file_content, file_name=file_name, mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) else: st.write("No files were created. Check the logs for details.")