File size: 1,468 Bytes
72090d7
cb4edcb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import gradio as gr
import torch
from diffusers import StableDiffusionPipeline, StableVideoDiffusionPipeline
from diffusers.utils import export_to_video
from PIL import Image

# Load models
text2img_pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
).to("cuda")
text2img_pipe.enable_model_cpu_offload()

video_pipe = StableVideoDiffusionPipeline.from_pretrained(
    "stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
).to("cuda")
video_pipe.enable_model_cpu_offload()

def generate_video(prompt, aspect_ratio):
    # Generate image
    image = text2img_pipe(prompt).images[0]
    if aspect_ratio == "16:9":
        image = image.resize((1024, 576))
    else:
        image = image.resize((576, 1024))

    # Generate video
    generator = torch.manual_seed(42)
    frames = video_pipe(image, decode_chunk_size=8, generator=generator).frames[0]
    export_to_video(frames, "output.mp4", fps=7)
    return "output.mp4"

gr.Interface(
    fn=generate_video,
    inputs=[
        gr.Textbox(label="Scene Prompt", placeholder="e.g. A 3D Islamic city at sunset, 4K"),
        gr.Radio(["16:9", "9:16"], label="Aspect Ratio", value="16:9")
    ],
    outputs=gr.Video(label="Generated Video"),
    title="Text β†’ Image β†’ Video Generator",
    description="Enter a scene prompt and get a cinematic video. Powered by Stable Diffusion + Stable Video Diffusion."
).launch()