Sabbirr12 commited on
Commit
cb4edcb
Β·
verified Β·
1 Parent(s): 44dff12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -64
app.py CHANGED
@@ -1,65 +1,41 @@
1
- import torch, cv2, os, numpy as np
2
  import gradio as gr
3
- from diffusers import StableDiffusionPipeline
4
- from modelscope.pipelines import pipeline
5
- from modelscope.outputs import OutputKeys
6
-
7
- device = "cuda" if torch.cuda.is_available() else "cpu"
8
-
9
- # -------- Text -> Image --------
10
- def text_to_image(prompt, ratio="9:16"):
11
- pipe = StableDiffusionPipeline.from_pretrained(
12
- "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16
13
- ).to(device)
14
-
15
- width, height = (576, 1024) if ratio=="9:16" else (1024, 576)
16
- image = pipe(prompt, width=width, height=height).images[0]
17
- filename = "generated.png"
18
- image.save(filename)
19
- return filename
20
-
21
- # -------- Cinematic Pan Video --------
22
- def cinematic_video(image, ratio="9:16"):
23
- size = (576, 1024) if ratio=="9:16" else (1024, 576)
24
- img = cv2.imread(image)
25
- img = cv2.resize(img, size)
26
- frames = []
27
- for i in range(40):
28
- dx, dy = int(i*0.5), int(i*0.3)
29
- M = np.float32([[1, 0, dx], [0, 1, dy]])
30
- frame = cv2.warpAffine(img, M, size)
31
- frames.append(frame)
32
- out_video = "cinematic_pan.mp4"
33
- out = cv2.VideoWriter(out_video, cv2.VideoWriter_fourcc(*'mp4v'), 12, size)
34
- for f in frames:
35
- out.write(f)
36
- out.release()
37
- return out_video
38
-
39
- # -------- Realistic AnimateDiff Video --------
40
- def realistic_video(image, ratio="9:16"):
41
- text2video = pipeline('image-to-video', model='damo-vilab/modelscope-damo-text-to-video-synthesis', device=device)
42
- result = text2video({'image': image})
43
- video_path = result[OutputKeys.OUTPUT_VIDEO]
44
- os.rename(video_path, "realistic.mp4")
45
- return "realistic.mp4"
46
-
47
- # -------- Wrapper: Text -> Videos --------
48
- def text_to_videos(prompt, ratio):
49
- img = text_to_image(prompt, ratio)
50
- vid1 = cinematic_video(img, ratio)
51
- vid2 = realistic_video(img, ratio)
52
- return vid1, vid2
53
-
54
- # -------- Gradio UI --------
55
- with gr.Blocks() as demo:
56
- gr.Markdown("## 🎬 Text β†’ Video App (Cinematic + Realistic)")
57
- prompt = gr.Textbox(label="Enter your text")
58
- ratio = gr.Radio(["9:16", "16:9"], value="16:9", label="Aspect Ratio")
59
- btn = gr.Button("Generate Videos")
60
- out_vid1 = gr.Video(label="πŸŽ₯ Cinematic Pan Motion")
61
- out_vid2 = gr.Video(label="πŸŽ₯ Realistic AnimateDiff")
62
-
63
- btn.click(text_to_videos, [prompt, ratio], [out_vid1, out_vid2])
64
-
65
- demo.launch()
 
 
1
  import gradio as gr
2
+ import torch
3
+ from diffusers import StableDiffusionPipeline, StableVideoDiffusionPipeline
4
+ from diffusers.utils import export_to_video
5
+ from PIL import Image
6
+
7
+ # Load models
8
+ text2img_pipe = StableDiffusionPipeline.from_pretrained(
9
+ "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
10
+ ).to("cuda")
11
+ text2img_pipe.enable_model_cpu_offload()
12
+
13
+ video_pipe = StableVideoDiffusionPipeline.from_pretrained(
14
+ "stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
15
+ ).to("cuda")
16
+ video_pipe.enable_model_cpu_offload()
17
+
18
+ def generate_video(prompt, aspect_ratio):
19
+ # Generate image
20
+ image = text2img_pipe(prompt).images[0]
21
+ if aspect_ratio == "16:9":
22
+ image = image.resize((1024, 576))
23
+ else:
24
+ image = image.resize((576, 1024))
25
+
26
+ # Generate video
27
+ generator = torch.manual_seed(42)
28
+ frames = video_pipe(image, decode_chunk_size=8, generator=generator).frames[0]
29
+ export_to_video(frames, "output.mp4", fps=7)
30
+ return "output.mp4"
31
+
32
+ gr.Interface(
33
+ fn=generate_video,
34
+ inputs=[
35
+ gr.Textbox(label="Scene Prompt", placeholder="e.g. A 3D Islamic city at sunset, 4K"),
36
+ gr.Radio(["16:9", "9:16"], label="Aspect Ratio", value="16:9")
37
+ ],
38
+ outputs=gr.Video(label="Generated Video"),
39
+ title="Text β†’ Image β†’ Video Generator",
40
+ description="Enter a scene prompt and get a cinematic video. Powered by Stable Diffusion + Stable Video Diffusion."
41
+ ).launch()