Spaces:

Esben922
/

GB-GB

Running

App Files Files Community

GB-GB / BG.py

Esben922

Update BG.py

d14c8c2 verified about 2 months ago

raw

history blame contribute delete

4.65 kB

	import streamlit as st
	import pandas as pd
	from io import BytesIO

	def format_datetime_columns(df):
	# Format specific columns as required
	if 'Fragtbrevsdato' in df.columns:
	df['Fragtbrevsdato'] = pd.to_datetime(df['Fragtbrevsdato'], errors='coerce').dt.strftime('%Y-%m-%d')
	if 'Fakturadato' in df.columns:
	df['Fakturadato'] = pd.to_datetime(df['Fakturadato'], errors='coerce').dt.strftime('%Y-%m-%d')
	if 'Leveringstidspunkt' in df.columns:
	df['Leveringstidspunkt'] = pd.to_datetime(df['Leveringstidspunkt'], errors='coerce').dt.strftime('%d-%m-%Y %H:%M:%S')
	return df

	def split_excel(file, columns, original_file_name, sheet_name="Linjer", header_row=3):
	try:
	# Load the entire sheet
	full_df = pd.read_excel(file, sheet_name=sheet_name, header=None)
	metadata_rows = full_df.iloc[:2].values.tolist() # Rows 1 and 2 as lists
	column_headers = full_df.iloc[header_row-1].tolist() # Row 3 as column headers
	df = pd.read_excel(file, sheet_name=sheet_name, header=header_row-1)
	log_messages = [f"Loaded worksheet '{sheet_name}'. Other worksheets were ignored."]
	except ValueError:
	return {}, [f"Worksheet '{sheet_name}' not found. Please upload a file with the correct structure."]
	except Exception as e:
	return {}, [f"An error occurred while reading the file: {e}"]

	# Format datetime columns
	df = format_datetime_columns(df)

	split_files = {}
	all_unique_values = set()

	# Validate that both columns exist in the dataframe
	for col in columns:
	if col not in df.columns:
	log_messages.append(f"Column '{col}' not found in the worksheet. Please check the file.")
	return {}, log_messages

	# Collect all unique values across both columns
	for col in columns:
	unique_values = df[col].dropna().unique()
	log_messages.append(f"Found unique values in column '{col}': {list(unique_values)}")
	all_unique_values.update(unique_values)

	log_messages.append(f"Total unique values across columns: {list(all_unique_values)}")

	# Create files for each unique value
	for value in all_unique_values:
	split_df = df[(df[columns[0]] == value) \| (df[columns[1]] == value)]
	if split_df.empty:
	log_messages.append(f"No rows found for value '{value}'. Skipping file creation.")
	continue

	# Insert three empty rows and populate them with metadata and headers
	empty_rows = pd.DataFrame([[''] * len(column_headers)] * 3, columns=column_headers)
	empty_rows.iloc[0] = metadata_rows[0] # Populate row 1 with metadata
	empty_rows.iloc[1] = metadata_rows[1] # Populate row 2 with metadata
	empty_rows.iloc[2] = column_headers # Populate row 3 with headers

	# Combine empty rows and the split data
	split_with_metadata = pd.concat([empty_rows, split_df], ignore_index=True)

	sanitized_value = str(value).replace("/", "-").replace("\\", "-")
	file_name = f"{original_file_name} - {sanitized_value}.xlsx"

	output = BytesIO()
	with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
	split_with_metadata.to_excel(writer, index=False, header=False, sheet_name='Sheet1')
	output.seek(0)
	split_files[file_name] = output
	log_messages.append(f"File '{file_name}' created with {len(split_df)} rows (plus metadata and headers).")

	return split_files, log_messages

	st.title("BG File Splitter")
	st.write("Upload an Excel file to split it based on unique values in specified columns.")

	uploaded_file = st.file_uploader("Upload Excel File", type=["xlsx"])

	if uploaded_file:
	st.write("Processing the uploaded file...")
	original_file_name = uploaded_file.name.rsplit(".", 1)[0]
	columns_to_split = ['Afsenderkunde', 'Modtagerkunde'] # Column names to check for unique values

	split_files, logs = split_excel(uploaded_file, columns_to_split, original_file_name)

	st.write("### Logs")
	if logs:
	for log in logs:
	st.write(log)
	else:
	st.write("No logs available. Something might have gone wrong.")

	if split_files:
	st.write("### Download Split Files")
	for file_name, file_content in split_files.items():
	st.download_button(
	label=f"Download {file_name}",
	data=file_content,
	file_name=file_name,
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)
	else:
	st.write("No files were created. Check the logs for details.")