File size: 1,468 Bytes
72090d7 cb4edcb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import gradio as gr
import torch
from diffusers import StableDiffusionPipeline, StableVideoDiffusionPipeline
from diffusers.utils import export_to_video
from PIL import Image
# Load models
text2img_pipe = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
).to("cuda")
text2img_pipe.enable_model_cpu_offload()
video_pipe = StableVideoDiffusionPipeline.from_pretrained(
"stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
).to("cuda")
video_pipe.enable_model_cpu_offload()
def generate_video(prompt, aspect_ratio):
# Generate image
image = text2img_pipe(prompt).images[0]
if aspect_ratio == "16:9":
image = image.resize((1024, 576))
else:
image = image.resize((576, 1024))
# Generate video
generator = torch.manual_seed(42)
frames = video_pipe(image, decode_chunk_size=8, generator=generator).frames[0]
export_to_video(frames, "output.mp4", fps=7)
return "output.mp4"
gr.Interface(
fn=generate_video,
inputs=[
gr.Textbox(label="Scene Prompt", placeholder="e.g. A 3D Islamic city at sunset, 4K"),
gr.Radio(["16:9", "9:16"], label="Aspect Ratio", value="16:9")
],
outputs=gr.Video(label="Generated Video"),
title="Text β Image β Video Generator",
description="Enter a scene prompt and get a cinematic video. Powered by Stable Diffusion + Stable Video Diffusion."
).launch() |