Spaces:

Sabbirr12
/

Click

Runtime error

App Files Files Community

Sabbirr12 commited on Sep 22

Commit

cb4edcb

verified ·

1 Parent(s): 44dff12

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -64

app.py CHANGED Viewed

@@ -1,65 +1,41 @@
-import torch, cv2, os, numpy as np
 import gradio as gr
-from diffusers import StableDiffusionPipeline
-from modelscope.pipelines import pipeline
-from modelscope.outputs import OutputKeys
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# -------- Text -> Image --------
-def text_to_image(prompt, ratio="9:16"):
-    pipe = StableDiffusionPipeline.from_pretrained(
-        "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16
-    ).to(device)
-    width, height = (576, 1024) if ratio=="9:16" else (1024, 576)
-    image = pipe(prompt, width=width, height=height).images[0]
-    filename = "generated.png"
-    image.save(filename)
-    return filename
-# -------- Cinematic Pan Video --------
-def cinematic_video(image, ratio="9:16"):
-    size = (576, 1024) if ratio=="9:16" else (1024, 576)
-    img = cv2.imread(image)
-    img = cv2.resize(img, size)
-    frames = []
-    for i in range(40):
-        dx, dy = int(i*0.5), int(i*0.3)
-        M = np.float32([[1, 0, dx], [0, 1, dy]])
-        frame = cv2.warpAffine(img, M, size)
-        frames.append(frame)
-    out_video = "cinematic_pan.mp4"
-    out = cv2.VideoWriter(out_video, cv2.VideoWriter_fourcc(*'mp4v'), 12, size)
-    for f in frames:
-        out.write(f)
-    out.release()
-    return out_video
-# -------- Realistic AnimateDiff Video --------
-def realistic_video(image, ratio="9:16"):
-    text2video = pipeline('image-to-video', model='damo-vilab/modelscope-damo-text-to-video-synthesis', device=device)
-    result = text2video({'image': image})
-    video_path = result[OutputKeys.OUTPUT_VIDEO]
-    os.rename(video_path, "realistic.mp4")
-    return "realistic.mp4"
-# -------- Wrapper: Text -> Videos --------
-def text_to_videos(prompt, ratio):
-    img = text_to_image(prompt, ratio)
-    vid1 = cinematic_video(img, ratio)
-    vid2 = realistic_video(img, ratio)
-    return vid1, vid2
-# -------- Gradio UI --------
-with gr.Blocks() as demo:
-    gr.Markdown("## 🎬 Text → Video App (Cinematic + Realistic)")
-    prompt = gr.Textbox(label="Enter your text")
-    ratio = gr.Radio(["9:16", "16:9"], value="16:9", label="Aspect Ratio")
-    btn = gr.Button("Generate Videos")
-    out_vid1 = gr.Video(label="🎥 Cinematic Pan Motion")
-    out_vid2 = gr.Video(label="🎥 Realistic AnimateDiff")
-    btn.click(text_to_videos, [prompt, ratio], [out_vid1, out_vid2])
-demo.launch()

 import gradio as gr
+import torch
+from diffusers import StableDiffusionPipeline, StableVideoDiffusionPipeline
+from diffusers.utils import export_to_video
+from PIL import Image
+# Load models
+text2img_pipe = StableDiffusionPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
+).to("cuda")
+text2img_pipe.enable_model_cpu_offload()
+video_pipe = StableVideoDiffusionPipeline.from_pretrained(
+    "stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
+).to("cuda")
+video_pipe.enable_model_cpu_offload()
+def generate_video(prompt, aspect_ratio):
+    # Generate image
+    image = text2img_pipe(prompt).images[0]
+    if aspect_ratio == "16:9":
+        image = image.resize((1024, 576))
+    else:
+        image = image.resize((576, 1024))
+    # Generate video
+    generator = torch.manual_seed(42)
+    frames = video_pipe(image, decode_chunk_size=8, generator=generator).frames[0]
+    export_to_video(frames, "output.mp4", fps=7)
+    return "output.mp4"
+gr.Interface(
+    fn=generate_video,
+    inputs=[
+        gr.Textbox(label="Scene Prompt", placeholder="e.g. A 3D Islamic city at sunset, 4K"),
+        gr.Radio(["16:9", "9:16"], label="Aspect Ratio", value="16:9")
+    ],
+    outputs=gr.Video(label="Generated Video"),
+    title="Text → Image → Video Generator",
+    description="Enter a scene prompt and get a cinematic video. Powered by Stable Diffusion + Stable Video Diffusion."
+).launch()