Spaces:
Runtime error
Runtime error
| import fitz # PyMuPDF | |
| import gradio as gr | |
| import json | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from groq import Groq | |
| import os | |
| # Load API key | |
| API_KEY = os.getenv('Groq') | |
| if not API_KEY: | |
| raise ValueError("API Key is missing! Set the environment variable 'GROQ_API_KEY'.") | |
| # Initialize Groq Client | |
| client = Groq(api_key=API_KEY) | |
| # Prompt Template | |
| PROMPT_TEMPLATE = """ | |
| You are an expert screenplay analyst. Convert the following film script text into the JSON structure below: | |
| {json_structure} | |
| Script Text: | |
| {text} | |
| Provide only the JSON response. | |
| """.strip() | |
| # Define the JSON structure to be extracted | |
| JSON_STRUCTURE = { | |
| "scenes": [ | |
| { | |
| "scene_heading": "", | |
| "location": "", | |
| "time_of_day": "", | |
| "characters": [], | |
| "emotions": [], | |
| "summary": "", | |
| "dialogues": [ | |
| { | |
| "character": "", | |
| "dialogue_text": "", | |
| "tone": "" | |
| } | |
| ] | |
| } | |
| ], | |
| "overall_emotional_arc": [], | |
| "story_beats": { | |
| "setup": "", | |
| "inciting_incident": "", | |
| "climax": "", | |
| "resolution": "" | |
| } | |
| } | |
| # Function to extract text from PDF | |
| def extract_text_from_pdf(pdf_file): | |
| text = "" | |
| try: | |
| with open(pdf_file.name, 'rb') as f: | |
| doc = fitz.open(stream=f.read(), filetype="pdf") | |
| for page in doc: | |
| text += page.get_text() + "\n" | |
| except Exception as e: | |
| return f"Error reading PDF: {e}" | |
| return text.strip() | |
| # Function to split text into chunks | |
| def split_text_into_chunks(text, chunk_size=2000): | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=100) | |
| return splitter.split_text(text) | |
| # Function to call Groq API | |
| def call_llm_api(text): | |
| prompt = PROMPT_TEMPLATE.format(json_structure=json.dumps(JSON_STRUCTURE, indent=2), text=text) | |
| try: | |
| response = client.chat.completions.create( | |
| messages=[{"role": "user", "content": prompt}], | |
| model="llama-3-3-70b-vision", # You can also test llama-3-70b-versatile | |
| ) | |
| raw_content = response.choices[0].message.content.strip() | |
| # Clean JSON formatting | |
| if raw_content.startswith("```json") and raw_content.endswith("```"): | |
| raw_content = raw_content[7:-3].strip() | |
| return json.loads(raw_content) | |
| except Exception as e: | |
| return {"error": f"API call failed: {e}"} | |
| # Function to merge JSON chunks | |
| def merge_json_chunks(chunks): | |
| combined_result = JSON_STRUCTURE.copy() | |
| combined_result["scenes"] = [] | |
| combined_result["overall_emotional_arc"] = [] | |
| for chunk in chunks: | |
| result = call_llm_api(chunk) | |
| if "error" in result: | |
| continue | |
| # Merge scenes | |
| if "scenes" in result: | |
| combined_result["scenes"].extend(result["scenes"]) | |
| # Merge emotional arc | |
| if "overall_emotional_arc" in result: | |
| combined_result["overall_emotional_arc"].extend(result["overall_emotional_arc"]) | |
| # Merge story beats only once (first time we encounter valid values) | |
| for beat in combined_result["story_beats"].keys(): | |
| if result.get("story_beats", {}).get(beat) and not combined_result["story_beats"][beat]: | |
| combined_result["story_beats"][beat] = result["story_beats"][beat] | |
| return combined_result | |
| # Gradio interface function | |
| def gradio_interface(file): | |
| pdf_text = extract_text_from_pdf(file) | |
| if pdf_text.startswith("Error"): | |
| return {"error": pdf_text} | |
| chunks = split_text_into_chunks(pdf_text) | |
| extracted_data = merge_json_chunks(chunks) | |
| return extracted_data | |
| # Gradio UI | |
| iface = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=gr.File(label="Upload Film Script PDF"), | |
| outputs="json", | |
| title="ScriptWhisper - Screenplay Structure & Emotion Extractor", | |
| description="Upload a screenplay PDF to extract scene structure, emotional arc, and story beats." | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| iface.launch() | |