Spaces:

zjrwtxtechstudio
/

video-to-pdf

Running

App Files Files Community

zjrwtxtechstudio commited on Nov 26, 2024

Commit

6888b5c

verified ·

1 Parent(s): 7795add

Upload 4 files

Browse files

Files changed (4) hide show

app.py +329 -0
app_gradio.py +329 -0
readme.md +25 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,329 @@

+import os
+import time
+import cv2
+import imutils
+import shutil
+import img2pdf
+import glob
+from skimage.metrics import structural_similarity
+import gradio as gr
+import tempfile
+############# Define constants
+OUTPUT_SLIDES_DIR = f"./output"
+FRAME_RATE = 3                   # no.of frames per second that needs to be processed, fewer the count faster the speed
+WARMUP = FRAME_RATE              # initial number of frames to be skipped
+FGBG_HISTORY = FRAME_RATE * 15   # no.of frames in background object
+VAR_THRESHOLD = 16               # Threshold on the squared Mahalanobis distance between the pixel and the model to decide whether a pixel is well described by the background model.
+DETECT_SHADOWS = False            # If true, the algorithm will detect shadows and mark them.
+MIN_PERCENT = 0.1                # min % of diff between foreground and background to detect if motion has stopped
+MAX_PERCENT = 3                  # max % of diff between foreground and background to detect if frame is still in motion
+SSIM_THRESHOLD = 0.9             # SSIM threshold of two consecutive frame
+def get_frames(video_path):
+    '''A fucntion to return the frames from a video located at video_path
+    this function skips frames as defined in FRAME_RATE'''
+    # open a pointer to the video file initialize the width and height of the frame
+    vs = cv2.VideoCapture(video_path)
+    if not vs.isOpened():
+        raise Exception(f'unable to open file {video_path}')
+    total_frames = vs.get(cv2.CAP_PROP_FRAME_COUNT)
+    frame_time = 0
+    frame_count = 0
+    # loop over the frames of the video
+    while True:
+        vs.set(cv2.CAP_PROP_POS_MSEC, frame_time * 1000)    # move frame to a timestamp
+        frame_time += 1/FRAME_RATE
+        (_, frame) = vs.read()
+        # if the frame is None, then we have reached the end of the video file
+        if frame is None:
+            break
+        frame_count += 1
+        yield frame_count, frame_time, frame
+    vs.release()
+def detect_unique_screenshots(video_path, output_folder_screenshot_path, progress=gr.Progress()):
+    '''Extract unique screenshots from video'''
+    fgbg = cv2.createBackgroundSubtractorMOG2(history=FGBG_HISTORY, varThreshold=VAR_THRESHOLD,detectShadows=DETECT_SHADOWS)
+    captured = False
+    start_time = time.time()
+    (W, H) = (None, None)
+    # Get total frames for progress calculation
+    cap = cv2.VideoCapture(video_path)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    cap.release()
+    screenshoots_count = 0
+    last_screenshot = None
+    saved_files = []
+    progress(0, desc="初始化视频处理...")
+    for frame_count, frame_time, frame in get_frames(video_path):
+        # Update progress
+        progress((frame_count / total_frames) * 0.7, desc=f"处理视频帧 {frame_count}/{total_frames}")
+        orig = frame.copy()
+        frame = imutils.resize(frame, width=600)
+        mask = fgbg.apply(frame)
+        if W is None or H is None:
+            (H, W) = mask.shape[:2]
+        p_diff = (cv2.countNonZero(mask) / float(W * H)) * 100
+        if p_diff < MIN_PERCENT and not captured and frame_count > WARMUP:
+            captured = True
+            filename = f"{screenshoots_count:03}_{round(frame_time/60, 2)}.png"
+            path = os.path.join(output_folder_screenshot_path, filename)
+            image_ssim = 0.0
+            if last_screenshot is not None:
+                image_ssim = structural_similarity(last_screenshot, orig, channel_axis=2, data_range=255)
+            if image_ssim < SSIM_THRESHOLD:
+                try:
+                    progress(0.7 + (screenshoots_count * 0.1), desc=f"保存截图 {screenshoots_count + 1}")
+                    print("saving {}".format(path))
+                    cv2.imwrite(str(path), orig)
+                    last_screenshot = orig
+                    saved_files.append(path)
+                    screenshoots_count += 1
+                except Exception as e:
+                    print(f"Error saving image: {str(e)}")
+                    continue
+        elif captured and p_diff >= MAX_PERCENT:
+            captured = False
+    progress(0.8, desc="截图提取完成")
+    print(f'{screenshoots_count} screenshots Captured!')
+    print(f'Time taken {time.time()-start_time}s')
+    return saved_files
+def initialize_output_folder(video_path):
+    '''Clean the output folder if already exists'''
+    # Create a safe folder name from video filename
+    video_filename = os.path.splitext(os.path.basename(video_path))[0]
+    # Replace potentially problematic characters
+    safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
+    output_folder_screenshot_path = os.path.join(OUTPUT_SLIDES_DIR, safe_filename)
+    if os.path.exists(output_folder_screenshot_path):
+        shutil.rmtree(output_folder_screenshot_path)
+    os.makedirs(output_folder_screenshot_path, exist_ok=True)
+    print('initialized output folder', output_folder_screenshot_path)
+    return output_folder_screenshot_path
+def convert_screenshots_to_pdf(video_path, output_folder_screenshot_path):
+    # Create a safe filename
+    video_filename = os.path.splitext(os.path.basename(video_path))[0]
+    safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
+    output_pdf_path = os.path.join(OUTPUT_SLIDES_DIR, f"{safe_filename}.pdf")
+    try:
+        print('output_folder_screenshot_path', output_folder_screenshot_path)
+        print('output_pdf_path', output_pdf_path)
+        print('converting images to pdf..')
+        # Get all PNG files and ensure they exist
+        png_files = sorted(glob.glob(os.path.join(output_folder_screenshot_path, "*.png")))
+        if not png_files:
+            raise Exception("No PNG files found to convert to PDF")
+        with open(output_pdf_path, "wb") as f:
+            f.write(img2pdf.convert(png_files))
+        print('Pdf Created!')
+        print('pdf saved at', output_pdf_path)
+        return output_pdf_path
+    except Exception as e:
+        print(f"Error creating PDF: {str(e)}")
+        raise
+def video_to_slides(video_path, progress=gr.Progress()):
+    progress(0.1, desc="准备处理视频...")
+    output_folder_screenshot_path = initialize_output_folder(video_path)
+    saved_files = detect_unique_screenshots(video_path, output_folder_screenshot_path, progress)
+    return output_folder_screenshot_path, saved_files
+def slides_to_pdf(video_path, output_folder_screenshot_path, saved_files, progress=gr.Progress()):
+    video_filename = os.path.splitext(os.path.basename(video_path))[0]
+    safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
+    output_pdf_path = os.path.join(OUTPUT_SLIDES_DIR, f"{safe_filename}.pdf")
+    try:
+        progress(0.9, desc="正在生成PDF...")
+        print('output_folder_screenshot_path', output_folder_screenshot_path)
+        print('output_pdf_path', output_pdf_path)
+        if not saved_files:
+            raise Exception("未从视频中捕获到截图")
+        existing_files = [f for f in saved_files if os.path.exists(f)]
+        if not existing_files:
+            raise Exception("未找到保存的截图文件")
+        with open(output_pdf_path, "wb") as f:
+            f.write(img2pdf.convert(existing_files))
+        progress(1.0, desc="处理完成！")
+        print('PDF创建成功！')
+        print('PDF保存位置:', output_pdf_path)
+        return output_pdf_path
+    except Exception as e:
+        print(f"创建PDF时出错: {str(e)}")
+        raise
+def run_app(video_path, progress=gr.Progress()):
+    try:
+        if not video_path:
+            raise gr.Error("请选择要处理的视频文件")
+        progress(0, desc="开始处理...")
+        output_folder_screenshot_path, saved_files = video_to_slides(video_path, progress)
+        return slides_to_pdf(video_path, output_folder_screenshot_path, saved_files, progress)
+    except Exception as e:
+        raise gr.Error(f"处理失败: {str(e)}")
+def process_video_file(video_file):
+    """Handle uploaded video file and return PDF"""
+    try:
+        # If video_file is a string (path), use it directly
+        if isinstance(video_file, str):
+            if video_file.strip() == "":
+                return None
+            return run_app(video_file)
+        # If it's an uploaded file, create a temporary file
+        if video_file is not None:
+            # Generate a unique filename for the temporary video
+            temp_filename = f"temp_video_{int(time.time())}.mp4"
+            temp_path = os.path.join(tempfile.gettempdir(), temp_filename)
+            try:
+                if hasattr(video_file, 'name'):  # If it's already a file path
+                    shutil.copyfile(video_file, temp_path)
+                else:  # If it's file content
+                    with open(temp_path, 'wb') as f:
+                        f.write(video_file)
+                # Process the video
+                output_folder_screenshot_path, saved_files = video_to_slides(temp_path)
+                pdf_path = slides_to_pdf(temp_path, output_folder_screenshot_path, saved_files)
+                # Cleanup
+                if os.path.exists(temp_path):
+                    os.unlink(temp_path)
+                return pdf_path
+            except Exception as e:
+                if os.path.exists(temp_path):
+                    os.unlink(temp_path)
+                raise gr.Error(f"处理视频时出错: {str(e)}")
+        return None
+    except Exception as e:
+        raise gr.Error(f"处理视频时出错: {str(e)}")
+# Create a modern interface with custom CSS
+css = """
+.gradio-container {
+    font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
+}
+.container {
+    max-width: 900px;
+    margin: auto;
+    padding: 20px;
+}
+.gr-button {
+    background: linear-gradient(90deg, #2563eb, #3b82f6);
+    border: none;
+    color: white;
+}
+.gr-button:hover {
+    background: linear-gradient(90deg, #1d4ed8, #2563eb);
+    transform: translateY(-1px);
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
+}
+.status-info {
+    margin-top: 10px;
+    padding: 10px;
+    border-radius: 4px;
+    background-color: #f3f4f6;
+}
+"""
+with gr.Blocks(css=css) as iface:
+    gr.Markdown(
+        """
+        # 🎥 视频转PDF智能助手
+        ### 轻松将视频转换为高质量PDF文档
+        公众号：正经人王同学 | 全网同名
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            video_input = gr.Video(label="上传视频")
+            video_path = gr.Textbox(label="或输入视频路径", placeholder="例如: ./input/video.mp4")
+            convert_btn = gr.Button("开始转换", variant="primary")
+    with gr.Row():
+        output_file = gr.File(label="下载PDF")
+    with gr.Row():
+        status = gr.Markdown(value="", elem_classes=["status-info"])
+    gr.Markdown(
+        """
+        ### 使用说明
+        1. 上传视频文件 或 输入视频文件路径
+        2. 点击"开始转换"按钮
+        3. 等待处理完成后下载生成的PDF文件
+        ### 特点
+        - 智能检测视频关键帧
+        - 高质量PDF输出
+        - 支持多种视频格式
+        """
+    )
+    def process_video(video, path):
+        if video:
+            return run_app(video)
+        elif path:
+            return run_app(path)
+        else:
+            raise gr.Error("请上传视频或输入视频路径")
+    convert_btn.click(
+        fn=process_video,
+        inputs=[video_input, video_path],
+        outputs=[output_file],
+    )
+if __name__ == "__main__":
+    iface.launch()

app_gradio.py ADDED Viewed

	@@ -0,0 +1,329 @@

+import os
+import time
+import cv2
+import imutils
+import shutil
+import img2pdf
+import glob
+from skimage.metrics import structural_similarity
+import gradio as gr
+import tempfile
+############# Define constants
+OUTPUT_SLIDES_DIR = f"./output"
+FRAME_RATE = 3                   # no.of frames per second that needs to be processed, fewer the count faster the speed
+WARMUP = FRAME_RATE              # initial number of frames to be skipped
+FGBG_HISTORY = FRAME_RATE * 15   # no.of frames in background object
+VAR_THRESHOLD = 16               # Threshold on the squared Mahalanobis distance between the pixel and the model to decide whether a pixel is well described by the background model.
+DETECT_SHADOWS = False            # If true, the algorithm will detect shadows and mark them.
+MIN_PERCENT = 0.1                # min % of diff between foreground and background to detect if motion has stopped
+MAX_PERCENT = 3                  # max % of diff between foreground and background to detect if frame is still in motion
+SSIM_THRESHOLD = 0.9             # SSIM threshold of two consecutive frame
+def get_frames(video_path):
+    '''A fucntion to return the frames from a video located at video_path
+    this function skips frames as defined in FRAME_RATE'''
+    # open a pointer to the video file initialize the width and height of the frame
+    vs = cv2.VideoCapture(video_path)
+    if not vs.isOpened():
+        raise Exception(f'unable to open file {video_path}')
+    total_frames = vs.get(cv2.CAP_PROP_FRAME_COUNT)
+    frame_time = 0
+    frame_count = 0
+    # loop over the frames of the video
+    while True:
+        vs.set(cv2.CAP_PROP_POS_MSEC, frame_time * 1000)    # move frame to a timestamp
+        frame_time += 1/FRAME_RATE
+        (_, frame) = vs.read()
+        # if the frame is None, then we have reached the end of the video file
+        if frame is None:
+            break
+        frame_count += 1
+        yield frame_count, frame_time, frame
+    vs.release()
+def detect_unique_screenshots(video_path, output_folder_screenshot_path, progress=gr.Progress()):
+    '''Extract unique screenshots from video'''
+    fgbg = cv2.createBackgroundSubtractorMOG2(history=FGBG_HISTORY, varThreshold=VAR_THRESHOLD,detectShadows=DETECT_SHADOWS)
+    captured = False
+    start_time = time.time()
+    (W, H) = (None, None)
+    # Get total frames for progress calculation
+    cap = cv2.VideoCapture(video_path)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    cap.release()
+    screenshoots_count = 0
+    last_screenshot = None
+    saved_files = []
+    progress(0, desc="初始化视频处理...")
+    for frame_count, frame_time, frame in get_frames(video_path):
+        # Update progress
+        progress((frame_count / total_frames) * 0.7, desc=f"处理视频帧 {frame_count}/{total_frames}")
+        orig = frame.copy()
+        frame = imutils.resize(frame, width=600)
+        mask = fgbg.apply(frame)
+        if W is None or H is None:
+            (H, W) = mask.shape[:2]
+        p_diff = (cv2.countNonZero(mask) / float(W * H)) * 100
+        if p_diff < MIN_PERCENT and not captured and frame_count > WARMUP:
+            captured = True
+            filename = f"{screenshoots_count:03}_{round(frame_time/60, 2)}.png"
+            path = os.path.join(output_folder_screenshot_path, filename)
+            image_ssim = 0.0
+            if last_screenshot is not None:
+                image_ssim = structural_similarity(last_screenshot, orig, channel_axis=2, data_range=255)
+            if image_ssim < SSIM_THRESHOLD:
+                try:
+                    progress(0.7 + (screenshoots_count * 0.1), desc=f"保存截图 {screenshoots_count + 1}")
+                    print("saving {}".format(path))
+                    cv2.imwrite(str(path), orig)
+                    last_screenshot = orig
+                    saved_files.append(path)
+                    screenshoots_count += 1
+                except Exception as e:
+                    print(f"Error saving image: {str(e)}")
+                    continue
+        elif captured and p_diff >= MAX_PERCENT:
+            captured = False
+    progress(0.8, desc="截图提取完成")
+    print(f'{screenshoots_count} screenshots Captured!')
+    print(f'Time taken {time.time()-start_time}s')
+    return saved_files
+def initialize_output_folder(video_path):
+    '''Clean the output folder if already exists'''
+    # Create a safe folder name from video filename
+    video_filename = os.path.splitext(os.path.basename(video_path))[0]
+    # Replace potentially problematic characters
+    safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
+    output_folder_screenshot_path = os.path.join(OUTPUT_SLIDES_DIR, safe_filename)
+    if os.path.exists(output_folder_screenshot_path):
+        shutil.rmtree(output_folder_screenshot_path)
+    os.makedirs(output_folder_screenshot_path, exist_ok=True)
+    print('initialized output folder', output_folder_screenshot_path)
+    return output_folder_screenshot_path
+def convert_screenshots_to_pdf(video_path, output_folder_screenshot_path):
+    # Create a safe filename
+    video_filename = os.path.splitext(os.path.basename(video_path))[0]
+    safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
+    output_pdf_path = os.path.join(OUTPUT_SLIDES_DIR, f"{safe_filename}.pdf")
+    try:
+        print('output_folder_screenshot_path', output_folder_screenshot_path)
+        print('output_pdf_path', output_pdf_path)
+        print('converting images to pdf..')
+        # Get all PNG files and ensure they exist
+        png_files = sorted(glob.glob(os.path.join(output_folder_screenshot_path, "*.png")))
+        if not png_files:
+            raise Exception("No PNG files found to convert to PDF")
+        with open(output_pdf_path, "wb") as f:
+            f.write(img2pdf.convert(png_files))
+        print('Pdf Created!')
+        print('pdf saved at', output_pdf_path)
+        return output_pdf_path
+    except Exception as e:
+        print(f"Error creating PDF: {str(e)}")
+        raise
+def video_to_slides(video_path, progress=gr.Progress()):
+    progress(0.1, desc="准备处理视频...")
+    output_folder_screenshot_path = initialize_output_folder(video_path)
+    saved_files = detect_unique_screenshots(video_path, output_folder_screenshot_path, progress)
+    return output_folder_screenshot_path, saved_files
+def slides_to_pdf(video_path, output_folder_screenshot_path, saved_files, progress=gr.Progress()):
+    video_filename = os.path.splitext(os.path.basename(video_path))[0]
+    safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
+    output_pdf_path = os.path.join(OUTPUT_SLIDES_DIR, f"{safe_filename}.pdf")
+    try:
+        progress(0.9, desc="正在生成PDF...")
+        print('output_folder_screenshot_path', output_folder_screenshot_path)
+        print('output_pdf_path', output_pdf_path)
+        if not saved_files:
+            raise Exception("未从视频中捕获到截图")
+        existing_files = [f for f in saved_files if os.path.exists(f)]
+        if not existing_files:
+            raise Exception("未找到保存的截图文件")
+        with open(output_pdf_path, "wb") as f:
+            f.write(img2pdf.convert(existing_files))
+        progress(1.0, desc="处理完成！")
+        print('PDF创建成功！')
+        print('PDF保存位置:', output_pdf_path)
+        return output_pdf_path
+    except Exception as e:
+        print(f"创建PDF时出错: {str(e)}")
+        raise
+def run_app(video_path, progress=gr.Progress()):
+    try:
+        if not video_path:
+            raise gr.Error("请选择要处理的视频文件")
+        progress(0, desc="开始处理...")
+        output_folder_screenshot_path, saved_files = video_to_slides(video_path, progress)
+        return slides_to_pdf(video_path, output_folder_screenshot_path, saved_files, progress)
+    except Exception as e:
+        raise gr.Error(f"处理失败: {str(e)}")
+def process_video_file(video_file):
+    """Handle uploaded video file and return PDF"""
+    try:
+        # If video_file is a string (path), use it directly
+        if isinstance(video_file, str):
+            if video_file.strip() == "":
+                return None
+            return run_app(video_file)
+        # If it's an uploaded file, create a temporary file
+        if video_file is not None:
+            # Generate a unique filename for the temporary video
+            temp_filename = f"temp_video_{int(time.time())}.mp4"
+            temp_path = os.path.join(tempfile.gettempdir(), temp_filename)
+            try:
+                if hasattr(video_file, 'name'):  # If it's already a file path
+                    shutil.copyfile(video_file, temp_path)
+                else:  # If it's file content
+                    with open(temp_path, 'wb') as f:
+                        f.write(video_file)
+                # Process the video
+                output_folder_screenshot_path, saved_files = video_to_slides(temp_path)
+                pdf_path = slides_to_pdf(temp_path, output_folder_screenshot_path, saved_files)
+                # Cleanup
+                if os.path.exists(temp_path):
+                    os.unlink(temp_path)
+                return pdf_path
+            except Exception as e:
+                if os.path.exists(temp_path):
+                    os.unlink(temp_path)
+                raise gr.Error(f"处理视频时出错: {str(e)}")
+        return None
+    except Exception as e:
+        raise gr.Error(f"处理视频时出错: {str(e)}")
+# Create a modern interface with custom CSS
+css = """
+.gradio-container {
+    font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
+}
+.container {
+    max-width: 900px;
+    margin: auto;
+    padding: 20px;
+}
+.gr-button {
+    background: linear-gradient(90deg, #2563eb, #3b82f6);
+    border: none;
+    color: white;
+}
+.gr-button:hover {
+    background: linear-gradient(90deg, #1d4ed8, #2563eb);
+    transform: translateY(-1px);
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
+}
+.status-info {
+    margin-top: 10px;
+    padding: 10px;
+    border-radius: 4px;
+    background-color: #f3f4f6;
+}
+"""
+with gr.Blocks(css=css) as iface:
+    gr.Markdown(
+        """
+        # 🎥 视频转PDF智能助手
+        ### 轻松将视频转换为高质量PDF文档
+        公众号：正经人王同学 | 全网同名
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            video_input = gr.Video(label="上传视频")
+            video_path = gr.Textbox(label="或输入视频路径", placeholder="例如: ./input/video.mp4")
+            convert_btn = gr.Button("开始转换", variant="primary")
+    with gr.Row():
+        output_file = gr.File(label="下载PDF")
+    with gr.Row():
+        status = gr.Markdown(value="", elem_classes=["status-info"])
+    gr.Markdown(
+        """
+        ### 使用说明
+        1. 上传视频文件 或 输入视频文件路径
+        2. 点击"开始转换"按钮
+        3. 等待处理完成后下载生成的PDF文件
+        ### 特点
+        - 智能检测视频关键帧
+        - 高质量PDF输出
+        - 支持多种视频格式
+        """
+    )
+    def process_video(video, path):
+        if video:
+            return run_app(video)
+        elif path:
+            return run_app(path)
+        else:
+            raise gr.Error("请上传视频或输入视频路径")
+    convert_btn.click(
+        fn=process_video,
+        inputs=[video_input, video_path],
+        outputs=[output_file],
+    )
+if __name__ == "__main__":
+    iface.launch()

readme.md ADDED Viewed

	@@ -0,0 +1,25 @@

+# 视频转PDF智能助手
+这是一个基于 Gradio 的 Web 应用，可以将视频自动转换为 PDF 文档。应用会智能检测视频中的关键帧，并将其转换为高质量的 PDF 文件。
+## 功能特点
+- 智能检测视频关键帧
+- 自动生成高质量PDF
+- 支持多种视频格式
+- 简单易用的Web界面
+## 使用方法
+1. 上传视频文件或输入视频路径
+2. 点击"开始转换"按钮
+3. 等待处理完成后下载生成的PDF
+## 技术栈
+- Python
+- OpenCV
+- Gradio
+- scikit-image
+- img2pdf

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+opencv-python==4.8.1.78
+imutils==0.5.4
+scikit-image==0.22.0
+gradio==4.8.0
+img2pdf==0.5.1