|
|
import gradio as gr |
|
|
import torch |
|
|
from diffusers import StableDiffusionPipeline, StableVideoDiffusionPipeline |
|
|
from diffusers.utils import export_to_video |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
text2img_pipe = StableDiffusionPipeline.from_pretrained( |
|
|
"runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 |
|
|
).to("cuda") |
|
|
text2img_pipe.enable_model_cpu_offload() |
|
|
|
|
|
video_pipe = StableVideoDiffusionPipeline.from_pretrained( |
|
|
"stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16" |
|
|
).to("cuda") |
|
|
video_pipe.enable_model_cpu_offload() |
|
|
|
|
|
def generate_video(prompt, aspect_ratio): |
|
|
|
|
|
image = text2img_pipe(prompt).images[0] |
|
|
if aspect_ratio == "16:9": |
|
|
image = image.resize((1024, 576)) |
|
|
else: |
|
|
image = image.resize((576, 1024)) |
|
|
|
|
|
|
|
|
generator = torch.manual_seed(42) |
|
|
frames = video_pipe(image, decode_chunk_size=8, generator=generator).frames[0] |
|
|
export_to_video(frames, "output.mp4", fps=7) |
|
|
return "output.mp4" |
|
|
|
|
|
gr.Interface( |
|
|
fn=generate_video, |
|
|
inputs=[ |
|
|
gr.Textbox(label="Scene Prompt", placeholder="e.g. A 3D Islamic city at sunset, 4K"), |
|
|
gr.Radio(["16:9", "9:16"], label="Aspect Ratio", value="16:9") |
|
|
], |
|
|
outputs=gr.Video(label="Generated Video"), |
|
|
title="Text β Image β Video Generator", |
|
|
description="Enter a scene prompt and get a cinematic video. Powered by Stable Diffusion + Stable Video Diffusion." |
|
|
).launch() |