import fitz # PyMuPDF import gradio as gr import json from langchain_text_splitters import RecursiveCharacterTextSplitter from groq import Groq import os # Load API key API_KEY = os.getenv('Groq') if not API_KEY: raise ValueError("API Key is missing! Set the environment variable 'GROQ_API_KEY'.") # Initialize Groq Client client = Groq(api_key=API_KEY) # Prompt Template PROMPT_TEMPLATE = """ You are an expert screenplay analyst. Convert the following film script text into the JSON structure below: {json_structure} Script Text: {text} Provide only the JSON response. """.strip() # Define the JSON structure to be extracted JSON_STRUCTURE = { "scenes": [ { "scene_heading": "", "location": "", "time_of_day": "", "characters": [], "emotions": [], "summary": "", "dialogues": [ { "character": "", "dialogue_text": "", "tone": "" } ] } ], "overall_emotional_arc": [], "story_beats": { "setup": "", "inciting_incident": "", "climax": "", "resolution": "" } } # Function to extract text from PDF def extract_text_from_pdf(pdf_file): text = "" try: with open(pdf_file.name, 'rb') as f: doc = fitz.open(stream=f.read(), filetype="pdf") for page in doc: text += page.get_text() + "\n" except Exception as e: return f"Error reading PDF: {e}" return text.strip() # Function to split text into chunks def split_text_into_chunks(text, chunk_size=2000): splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=100) return splitter.split_text(text) # Function to call Groq API def call_llm_api(text): prompt = PROMPT_TEMPLATE.format(json_structure=json.dumps(JSON_STRUCTURE, indent=2), text=text) try: response = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama-3-3-70b-vision", # You can also test llama-3-70b-versatile ) raw_content = response.choices[0].message.content.strip() # Clean JSON formatting if raw_content.startswith("```json") and raw_content.endswith("```"): raw_content = raw_content[7:-3].strip() return json.loads(raw_content) except Exception as e: return {"error": f"API call failed: {e}"} # Function to merge JSON chunks def merge_json_chunks(chunks): combined_result = JSON_STRUCTURE.copy() combined_result["scenes"] = [] combined_result["overall_emotional_arc"] = [] for chunk in chunks: result = call_llm_api(chunk) if "error" in result: continue # Merge scenes if "scenes" in result: combined_result["scenes"].extend(result["scenes"]) # Merge emotional arc if "overall_emotional_arc" in result: combined_result["overall_emotional_arc"].extend(result["overall_emotional_arc"]) # Merge story beats only once (first time we encounter valid values) for beat in combined_result["story_beats"].keys(): if result.get("story_beats", {}).get(beat) and not combined_result["story_beats"][beat]: combined_result["story_beats"][beat] = result["story_beats"][beat] return combined_result # Gradio interface function def gradio_interface(file): pdf_text = extract_text_from_pdf(file) if pdf_text.startswith("Error"): return {"error": pdf_text} chunks = split_text_into_chunks(pdf_text) extracted_data = merge_json_chunks(chunks) return extracted_data # Gradio UI iface = gr.Interface( fn=gradio_interface, inputs=gr.File(label="Upload Film Script PDF"), outputs="json", title="ScriptWhisper - Screenplay Structure & Emotion Extractor", description="Upload a screenplay PDF to extract scene structure, emotional arc, and story beats." ) # Launch the app if __name__ == "__main__": iface.launch()