import time
import uuid
from typing import Tuple

import gradio as gr
import supervision as sv
from tqdm import tqdm

START_FRAME = 0
END_FRAME = 10
TOTAL = END_FRAME - START_FRAME


def process(
    source_video: str,
    prompt: str,
    confidence: float,
    progress=gr.Progress(track_tqdm=True)
) -> Tuple[str, str]:
    name = str(uuid.uuid4())
    video_info = sv.VideoInfo.from_video_path(source_video)
    frame_iterator = iter(sv.get_video_frames_generator(
        source_path=source_video, start=START_FRAME, end=END_FRAME))

    with sv.VideoSink(f"{name}.mp4", video_info=video_info) as sink:
        for _ in tqdm(range(TOTAL), desc="Masking frames"):
            frame = next(frame_iterator)
            sink.write_frame(frame)
            time.sleep(0.1)

    return f"{name}.mp4", f"{name}.mp4"


with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            source_video_player = gr.Video(
                label="Source video", source="upload", format="mp4")
            prompt_text = gr.Textbox(
                label="Prompt", value="person")
            confidence_slider = gr.Slider(
                label="Confidence", minimum=0.5, maximum=1.0, step=0.05, value=0.6)
            submit_button = gr.Button("Submit")
        with gr.Column():
            masked_video_player = gr.Video(label="Masked video")
            painted_video_player = gr.Video(label="Painted video")

    submit_button.click(
        process,
        inputs=[source_video_player, prompt_text, confidence_slider],
        outputs=[masked_video_player, painted_video_player])

demo.queue().launch(debug=False, show_error=True)