from typing import List, Tuple from langchain_core.documents import Document def combine_documents_without_losing_pagination(documents: list[Document]): combined_text = "" page_boundaries: List[Tuple[int, int, int]] = ( [] ) # (start_idx, end_idx, page_number) current_position = 0 for document in documents: start = current_position combined_text += document.page_content end = current_position + len(document.page_content) page_number = document.metadata.get("page", len(page_boundaries) + 1) page_boundaries.append((start, end, page_number)) current_position = end return page_boundaries, combined_text