GB-GB / BG.py
Esben922's picture
Update BG.py
d14c8c2 verified
import streamlit as st
import pandas as pd
from io import BytesIO
def format_datetime_columns(df):
# Format specific columns as required
if 'Fragtbrevsdato' in df.columns:
df['Fragtbrevsdato'] = pd.to_datetime(df['Fragtbrevsdato'], errors='coerce').dt.strftime('%Y-%m-%d')
if 'Fakturadato' in df.columns:
df['Fakturadato'] = pd.to_datetime(df['Fakturadato'], errors='coerce').dt.strftime('%Y-%m-%d')
if 'Leveringstidspunkt' in df.columns:
df['Leveringstidspunkt'] = pd.to_datetime(df['Leveringstidspunkt'], errors='coerce').dt.strftime('%d-%m-%Y %H:%M:%S')
return df
def split_excel(file, columns, original_file_name, sheet_name="Linjer", header_row=3):
try:
# Load the entire sheet
full_df = pd.read_excel(file, sheet_name=sheet_name, header=None)
metadata_rows = full_df.iloc[:2].values.tolist() # Rows 1 and 2 as lists
column_headers = full_df.iloc[header_row-1].tolist() # Row 3 as column headers
df = pd.read_excel(file, sheet_name=sheet_name, header=header_row-1)
log_messages = [f"Loaded worksheet '{sheet_name}'. Other worksheets were ignored."]
except ValueError:
return {}, [f"Worksheet '{sheet_name}' not found. Please upload a file with the correct structure."]
except Exception as e:
return {}, [f"An error occurred while reading the file: {e}"]
# Format datetime columns
df = format_datetime_columns(df)
split_files = {}
all_unique_values = set()
# Validate that both columns exist in the dataframe
for col in columns:
if col not in df.columns:
log_messages.append(f"Column '{col}' not found in the worksheet. Please check the file.")
return {}, log_messages
# Collect all unique values across both columns
for col in columns:
unique_values = df[col].dropna().unique()
log_messages.append(f"Found unique values in column '{col}': {list(unique_values)}")
all_unique_values.update(unique_values)
log_messages.append(f"Total unique values across columns: {list(all_unique_values)}")
# Create files for each unique value
for value in all_unique_values:
split_df = df[(df[columns[0]] == value) | (df[columns[1]] == value)]
if split_df.empty:
log_messages.append(f"No rows found for value '{value}'. Skipping file creation.")
continue
# Insert three empty rows and populate them with metadata and headers
empty_rows = pd.DataFrame([[''] * len(column_headers)] * 3, columns=column_headers)
empty_rows.iloc[0] = metadata_rows[0] # Populate row 1 with metadata
empty_rows.iloc[1] = metadata_rows[1] # Populate row 2 with metadata
empty_rows.iloc[2] = column_headers # Populate row 3 with headers
# Combine empty rows and the split data
split_with_metadata = pd.concat([empty_rows, split_df], ignore_index=True)
sanitized_value = str(value).replace("/", "-").replace("\\", "-")
file_name = f"{original_file_name} - {sanitized_value}.xlsx"
output = BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
split_with_metadata.to_excel(writer, index=False, header=False, sheet_name='Sheet1')
output.seek(0)
split_files[file_name] = output
log_messages.append(f"File '{file_name}' created with {len(split_df)} rows (plus metadata and headers).")
return split_files, log_messages
st.title("BG File Splitter")
st.write("Upload an Excel file to split it based on unique values in specified columns.")
uploaded_file = st.file_uploader("Upload Excel File", type=["xlsx"])
if uploaded_file:
st.write("Processing the uploaded file...")
original_file_name = uploaded_file.name.rsplit(".", 1)[0]
columns_to_split = ['Afsenderkunde', 'Modtagerkunde'] # Column names to check for unique values
split_files, logs = split_excel(uploaded_file, columns_to_split, original_file_name)
st.write("### Logs")
if logs:
for log in logs:
st.write(log)
else:
st.write("No logs available. Something might have gone wrong.")
if split_files:
st.write("### Download Split Files")
for file_name, file_content in split_files.items():
st.download_button(
label=f"Download {file_name}",
data=file_content,
file_name=file_name,
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
else:
st.write("No files were created. Check the logs for details.")