import torch from transformers import Blip2Processor, Blip2ForConditionalGeneration from PIL import Image import gradio as gr from io import BytesIO import requests # ----------------------------- # Device and Model Setup # ----------------------------- device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") model_name = "Salesforce/blip2-flan-t5-xl-coco" # public model processor = Blip2Processor.from_pretrained(model_name) model = Blip2ForConditionalGeneration.from_pretrained(model_name) model.to(device) # ----------------------------- # Inference Function # ----------------------------- def analyze_eo_image(image, question): if image is None or question.strip() == "": return "Please upload an EO image and ask a question." if image.mode != "RGB": image = image.convert("RGB") inputs = processor(image, question, return_tensors="pt").to( device, torch.float16 if device=="cuda" else torch.float32 ) out = model.generate( **inputs, max_new_tokens=80, do_sample=True, temperature=0.7 ) return processor.decode(out[0], skip_special_tokens=True) # ----------------------------- # Optional: URL input # ----------------------------- def analyze_eo_url(url, question): try: response = requests.get(url) image = Image.open(BytesIO(response.content)).convert("RGB") return analyze_eo_image(image, question) except Exception as e: return f"Error loading image: {e}" # ----------------------------- # Beautiful Gradio Layout # ----------------------------- with gr.Blocks(title="🌍 EO Image Analysis") as demo: gr.Markdown( """ # 🌍 Earth Observation Image Analysis Ask questions about EO images using a multimodal AI model. Powered by BLIP-2 + FLAN-T5. **Examples:** "Identify land cover types", "Where is the river?", "Has urban area expanded?" """ ) with gr.Tabs(): with gr.Tab("Upload Image"): with gr.Row(): with gr.Column(scale=1): img_input = gr.Image(type="pil", label="Upload EO Image") question_input = gr.Textbox(label="Ask a question about the image", placeholder="E.g. Where is the river?") submit_btn = gr.Button("Analyze 🌟") with gr.Column(scale=1): output_text = gr.Textbox(label="AI Answer", interactive=False) submit_btn.click(analyze_eo_image, inputs=[img_input, question_input], outputs=output_text) with gr.Tab("Use Image URL"): with gr.Row(): with gr.Column(scale=1): url_input = gr.Textbox(label="Enter Image URL") url_question = gr.Textbox(label="Ask a question about the image") url_btn = gr.Button("Analyze 🌟") with gr.Column(scale=1): url_output = gr.Textbox(label="AI Answer", interactive=False) url_btn.click(analyze_eo_url, inputs=[url_input, url_question], outputs=url_output) gr.Markdown( "💡 Tip: Use clear, simple questions for best results. Supports natural language queries about EO images." ) demo.launch(share=True)