|
import streamlit as st |
|
import pandas as pd |
|
from io import BytesIO |
|
|
|
def format_datetime_columns(df): |
|
|
|
if 'Fragtbrevsdato' in df.columns: |
|
df['Fragtbrevsdato'] = pd.to_datetime(df['Fragtbrevsdato'], errors='coerce').dt.strftime('%Y-%m-%d') |
|
if 'Fakturadato' in df.columns: |
|
df['Fakturadato'] = pd.to_datetime(df['Fakturadato'], errors='coerce').dt.strftime('%Y-%m-%d') |
|
if 'Leveringstidspunkt' in df.columns: |
|
df['Leveringstidspunkt'] = pd.to_datetime(df['Leveringstidspunkt'], errors='coerce').dt.strftime('%d-%m-%Y %H:%M:%S') |
|
return df |
|
|
|
def split_excel(file, columns, original_file_name, sheet_name="Linjer", header_row=3): |
|
try: |
|
|
|
full_df = pd.read_excel(file, sheet_name=sheet_name, header=None) |
|
metadata_rows = full_df.iloc[:2].values.tolist() |
|
column_headers = full_df.iloc[header_row-1].tolist() |
|
df = pd.read_excel(file, sheet_name=sheet_name, header=header_row-1) |
|
log_messages = [f"Loaded worksheet '{sheet_name}'. Other worksheets were ignored."] |
|
except ValueError: |
|
return {}, [f"Worksheet '{sheet_name}' not found. Please upload a file with the correct structure."] |
|
except Exception as e: |
|
return {}, [f"An error occurred while reading the file: {e}"] |
|
|
|
|
|
df = format_datetime_columns(df) |
|
|
|
split_files = {} |
|
all_unique_values = set() |
|
|
|
|
|
for col in columns: |
|
if col not in df.columns: |
|
log_messages.append(f"Column '{col}' not found in the worksheet. Please check the file.") |
|
return {}, log_messages |
|
|
|
|
|
for col in columns: |
|
unique_values = df[col].dropna().unique() |
|
log_messages.append(f"Found unique values in column '{col}': {list(unique_values)}") |
|
all_unique_values.update(unique_values) |
|
|
|
log_messages.append(f"Total unique values across columns: {list(all_unique_values)}") |
|
|
|
|
|
for value in all_unique_values: |
|
split_df = df[(df[columns[0]] == value) | (df[columns[1]] == value)] |
|
if split_df.empty: |
|
log_messages.append(f"No rows found for value '{value}'. Skipping file creation.") |
|
continue |
|
|
|
|
|
empty_rows = pd.DataFrame([[''] * len(column_headers)] * 3, columns=column_headers) |
|
empty_rows.iloc[0] = metadata_rows[0] |
|
empty_rows.iloc[1] = metadata_rows[1] |
|
empty_rows.iloc[2] = column_headers |
|
|
|
|
|
split_with_metadata = pd.concat([empty_rows, split_df], ignore_index=True) |
|
|
|
sanitized_value = str(value).replace("/", "-").replace("\\", "-") |
|
file_name = f"{original_file_name} - {sanitized_value}.xlsx" |
|
|
|
output = BytesIO() |
|
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: |
|
split_with_metadata.to_excel(writer, index=False, header=False, sheet_name='Sheet1') |
|
output.seek(0) |
|
split_files[file_name] = output |
|
log_messages.append(f"File '{file_name}' created with {len(split_df)} rows (plus metadata and headers).") |
|
|
|
return split_files, log_messages |
|
|
|
st.title("BG File Splitter") |
|
st.write("Upload an Excel file to split it based on unique values in specified columns.") |
|
|
|
uploaded_file = st.file_uploader("Upload Excel File", type=["xlsx"]) |
|
|
|
if uploaded_file: |
|
st.write("Processing the uploaded file...") |
|
original_file_name = uploaded_file.name.rsplit(".", 1)[0] |
|
columns_to_split = ['Afsenderkunde', 'Modtagerkunde'] |
|
|
|
split_files, logs = split_excel(uploaded_file, columns_to_split, original_file_name) |
|
|
|
st.write("### Logs") |
|
if logs: |
|
for log in logs: |
|
st.write(log) |
|
else: |
|
st.write("No logs available. Something might have gone wrong.") |
|
|
|
if split_files: |
|
st.write("### Download Split Files") |
|
for file_name, file_content in split_files.items(): |
|
st.download_button( |
|
label=f"Download {file_name}", |
|
data=file_content, |
|
file_name=file_name, |
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" |
|
) |
|
else: |
|
st.write("No files were created. Check the logs for details.") |
|
|