import time import uuid from typing import Tuple import gradio as gr import supervision as sv from tqdm import tqdm START_FRAME = 0 END_FRAME = 10 TOTAL = END_FRAME - START_FRAME def process( source_video: str, prompt: str, confidence: float, progress=gr.Progress(track_tqdm=True) ) -> Tuple[str, str]: name = str(uuid.uuid4()) video_info = sv.VideoInfo.from_video_path(source_video) frame_iterator = iter(sv.get_video_frames_generator( source_path=source_video, start=START_FRAME, end=END_FRAME)) with sv.VideoSink(f"{name}.mp4", video_info=video_info) as sink: for _ in tqdm(range(TOTAL), desc="Masking frames"): frame = next(frame_iterator) sink.write_frame(frame) time.sleep(0.1) return f"{name}.mp4", f"{name}.mp4" with gr.Blocks() as demo: with gr.Row(): with gr.Column(): source_video_player = gr.Video( label="Source video", source="upload", format="mp4") prompt_text = gr.Textbox( label="Prompt", value="person") confidence_slider = gr.Slider( label="Confidence", minimum=0.5, maximum=1.0, step=0.05, value=0.6) submit_button = gr.Button("Submit") with gr.Column(): masked_video_player = gr.Video(label="Masked video") painted_video_player = gr.Video(label="Painted video") submit_button.click( process, inputs=[source_video_player, prompt_text, confidence_slider], outputs=[masked_video_player, painted_video_player]) demo.queue().launch(debug=False, show_error=True)