Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import cv2 | |
| import torch | |
| from PIL import Image | |
| from gtts import gTTS | |
| from scenedetect import open_video, SceneManager, ContentDetector | |
| from transformers import BlipProcessor, BlipForConditionalGeneration | |
| from openai import OpenAI | |
| import base64 | |
| import moviepy.editor as mp | |
| # Load AI models | |
| caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
| client = OpenAI( | |
| api_key="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" | |
| ) | |
| # Streamlit App UI | |
| st.title("π₯ AI-Powered Video Summarization") | |
| # Define custom CSS | |
| def set_background(image_file): | |
| with open(image_file, "rb") as image: | |
| encoded_string = base64.b64encode(image.read()).decode() | |
| st.markdown( | |
| f""" | |
| <style> | |
| .stApp {{ | |
| background-image: url("data:image/jpg;base64,{encoded_string}"); | |
| background-size: cover; | |
| background-position: center; | |
| background-repeat: no-repeat; | |
| }} | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # Set background | |
| set_background("background.jpg") | |
| uploaded_file = st.file_uploader("π€ Upload a Video File", type=["mp4"]) | |
| if uploaded_file: | |
| video_path = "input_video.mp4" | |
| with open(video_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| st.video(video_path) | |
| # Scene Detection & Frame Extraction | |
| st.write("π Detecting scene changes and extracting key frames...") | |
| def extract_key_frames(video_path, output_folder="frames"): | |
| os.makedirs(output_folder, exist_ok=True) | |
| video = open_video(video_path) | |
| scene_manager = SceneManager() | |
| scene_manager.add_detector(ContentDetector(threshold=27.0)) | |
| scene_manager.detect_scenes(video) | |
| scenes = scene_manager.get_scene_list() | |
| cap = cv2.VideoCapture(video_path) | |
| for i, (start, end) in enumerate(scenes): | |
| frame_time = start.get_frames() # Extract frame at scene start | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, frame_time) | |
| ret, frame = cap.read() | |
| if ret: | |
| frame_path = os.path.join(output_folder, f"scene_{i+1}.jpg") | |
| cv2.imwrite(frame_path, frame) | |
| print(f"Saved: {frame_path}") | |
| cap.release() | |
| extract_key_frames(video_path) | |
| # Caption Generation | |
| st.write("π Generating captions for extracted frames...") | |
| def generate_caption(image_path): | |
| image = Image.open(image_path).convert("RGB") | |
| inputs = caption_processor(image, return_tensors="pt") | |
| caption_ids = caption_model.generate(**inputs) | |
| return caption_processor.decode(caption_ids[0], skip_special_tokens=True) | |
| captions = [] | |
| for filename in sorted(os.listdir("frames")): | |
| if filename.endswith(".jpg"): | |
| image_path = os.path.join("frames", filename) | |
| captions.append(generate_caption(image_path)) | |
| # st.write("π Generated Captions:", captions) | |
| # Summarization | |
| st.write("π Summarizing captions using AI...") | |
| def summarize_captions(captions): | |
| prompt = f"Summarize the following sequence of video frames into a meaningful story under 800 characters:\n\n{captions}" | |
| completion = client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[{"role": "system", "content": "You are an AI that summarizes video content."}, | |
| {"role": "user", "content": prompt}], | |
| max_tokens=200 | |
| ) | |
| return completion.choices[0].message.content | |
| summary = summarize_captions(captions) | |
| st.write(summary) | |
| # Text-to-Speech | |
| st.write("π Generating voice narration...") | |
| def text_to_speech(text, output_audio="summary_audio.mp3"): | |
| tts = gTTS(text, lang="en") | |
| tts.save(output_audio) | |
| text_to_speech(summary) | |
| st.audio('summary_audio.mp3') | |
| #Video -summary | |
| st.write("π Video Summary:") | |
| def create_summary_video(image_folder, output_video): | |
| images = sorted([os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith(".jpg")]) | |
| clips = [mp.ImageClip(img).set_duration(2) for img in images] # 2 sec per frame | |
| video = mp.concatenate_videoclips(clips, method="compose") | |
| video.write_videofile(output_video, fps=24) | |
| # Example usage | |
| create_summary_video("frames", "summary_video.mp4") | |
| st.video('summary_video.mp4') | |
| # # Combine Audio & Video | |
| # st.write("π¬ Merging audio with the video...") | |
| # def add_audio_to_video(video_path, audio_path, output_video="final_video.mp4"): | |
| # video = moviepy.editor.VideoFileClip(video_path) | |
| # audio = mp.AudioFileClip(audio_path) | |
| # if audio.duration > video.duration: | |
| # audio = audio.subclip(0, video.duration) | |
| # final_video = video.set_audio(audio) | |
| # final_video.write_videofile(output_video, codec="libx264", audio_codec="aac") | |
| # add_audio_to_video("summary_video.mp4", "summary_audio.mp3") | |
| # st.video("final_video.mp4") | |