Spaces:

lyimo
/

gemma

Runtime error

App Files Files Community

lyimo commited on Jul 28

Commit

35283c1

verified ·

1 Parent(s): fa3f706

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -68

app.py CHANGED Viewed

@@ -3,7 +3,8 @@ import gradio as gr
 from unsloth import FastLanguageModel
 import torch
 from PIL import Image
-from transformers import TextStreamer
 import os
 # --- Configuration ---
@@ -11,8 +12,7 @@ import os
 BASE_MODEL_NAME = "unsloth/gemma-3n-E4B-it"
 # 2. Your PEFT (LoRA) Model Name on Hugging Face Hub
-# Replace 'your-username' and 'your-model-repo-name' with your actual details
-PEFT_MODEL_NAME = "lyimo/mosquito-breeding-detection" # Or your Hugging Face repo path
 # 3. Max sequence length (should match or exceed training setting)
 MAX_SEQ_LENGTH = 2048
@@ -35,99 +35,63 @@ tokenizer = get_chat_template(tokenizer, chat_template="gemma-3")
 print("Model and tokenizer loaded successfully!")
 # --- Inference Function ---
 def analyze_image(image, prompt):
     """
-    Analyzes the image using the fine-tuned model.
     """
     if image is None:
         return "Please upload an image."
-    # Save the uploaded image temporarily (or pass the PIL object, see notes)
-    # Unsloth's tokenizer often expects the image path during apply_chat_template
-    # for multimodal inputs.
     temp_image_path = "temp_uploaded_image.jpg"
     try:
-        image.save(temp_image_path) # Save PIL image from Gradio
-        # Construct messages
         messages = [
             {
                 "role": "user",
                 "content": [
-                    {"type": "image", "image": temp_image_path}, # Pass the temporary path
                     {"type": "text", "text": prompt}
                 ]
             }
         ]
-        # Apply chat template
         full_prompt = tokenizer.apply_chat_template(
             messages,
             tokenize=False,
             add_generation_prompt=True
         )
-        # Tokenize inputs
         inputs = tokenizer(
             full_prompt,
             return_tensors="pt",
         ).to(model.device)
-        # --- Generation ---
-        # Collect the output text
-        output_text = ""
-        def text_collector(text):
-            nonlocal output_text
-            output_text += text
-        # Create a custom streamer to capture text
-        class GradioTextStreamer:
-            def __init__(self, tokenizer, callback=None):
-                self.tokenizer = tokenizer
-                self.callback = callback
-                self.token_cache = []
-                self.print_len = 0
-            def put(self, value):
-                if self.callback:
-                    # Decode the current token(s)
-                    self.token_cache.extend(value.tolist())
-                    text = self.tokenizer.decode(self.token_cache, skip_special_tokens=True)
-                    # Call the callback with the new text
-                    self.callback(text[len(output_text):]) # Send only the new part
-                    # Update output_text locally to track progress
-                    nonlocal output_text
-                    output_text = text
-            def end(self):
-                if self.callback:
-                   # Ensure any remaining text is sent
-                   self.callback("") # Signal end, or send final text if needed differently
-                   self.token_cache = []
-                   self.print_len = 0
-        streamer = GradioTextStreamer(tokenizer, callback=text_collector)
-        # Start generation in a separate thread to allow streaming
-        import threading
-        def generate_text():
-            _ = model.generate(
-                **inputs,
-                max_new_tokens=1024,
-                streamer=streamer,
-                # You can add other generation parameters here
-                # temperature=0.7,
-                # top_p=0.95,
-                # do_sample=True
-            )
-            # Signal completion after generation finishes
-            yield output_text # Final yield to ensure completeness
-        # Yield initial output and then stream updates
-        yield output_text # Initial empty or partial output
-        for _ in generate_text(): # This loop runs the generation
-            yield output_text # Yield updated text as it's generated
     except Exception as e:
         error_msg = f"An error occurred during processing: {str(e)}"
@@ -138,6 +102,7 @@ def analyze_image(image, prompt):
         if os.path.exists(temp_image_path):
             os.remove(temp_image_path)
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown("# 🦟 Mosquito Breeding Site Detector")
@@ -155,13 +120,14 @@ with gr.Blocks() as demo:
             output_text = gr.Textbox(label="Analysis Result", interactive=False, lines=15)
     # Connect the button to the function
     submit_btn.click(
         fn=analyze_image,
         inputs=[image_input, prompt_input],
-        outputs=output_text, # Stream to the textbox
-        streaming=True # Enable streaming output
     )
 # Launch the app
 if __name__ == "__main__":
-    demo.launch()

 from unsloth import FastLanguageModel
 import torch
 from PIL import Image
+from transformers import TextIteratorStreamer
+from threading import Thread
 import os
 # --- Configuration ---
 BASE_MODEL_NAME = "unsloth/gemma-3n-E4B-it"
 # 2. Your PEFT (LoRA) Model Name on Hugging Face Hub
+PEFT_MODEL_NAME = "lyimo/mosquito-breeding-detection"
 # 3. Max sequence length (should match or exceed training setting)
 MAX_SEQ_LENGTH = 2048
 print("Model and tokenizer loaded successfully!")
 # --- Inference Function ---
 def analyze_image(image, prompt):
     """
+    Analyzes the image using the fine-tuned model and streams the output.
     """
     if image is None:
         return "Please upload an image."
     temp_image_path = "temp_uploaded_image.jpg"
     try:
+        image.save(temp_image_path)
         messages = [
             {
                 "role": "user",
                 "content": [
+                    {"type": "image", "image": temp_image_path},
                     {"type": "text", "text": prompt}
                 ]
             }
         ]
         full_prompt = tokenizer.apply_chat_template(
             messages,
             tokenize=False,
             add_generation_prompt=True
         )
         inputs = tokenizer(
             full_prompt,
             return_tensors="pt",
         ).to(model.device)
+        # Use TextIteratorStreamer for simpler, more robust streaming
+        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+        # Define generation arguments
+        generation_kwargs = dict(
+            **inputs,
+            streamer=streamer,
+            max_new_tokens=1024,
+            # You can add other generation parameters here
+            # temperature=0.7,
+            # top_p=0.95,
+            # do_sample=True
+        )
+        # Run generation in a separate thread to avoid blocking the UI
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # Yield the generated text as it becomes available
+        generated_text = ""
+        for new_text in streamer:
+            generated_text += new_text
+            yield generated_text
     except Exception as e:
         error_msg = f"An error occurred during processing: {str(e)}"
         if os.path.exists(temp_image_path):
             os.remove(temp_image_path)
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown("# 🦟 Mosquito Breeding Site Detector")
             output_text = gr.Textbox(label="Analysis Result", interactive=False, lines=15)
     # Connect the button to the function
+    # The 'streaming=True' flag in Gradio 3 is deprecated. The streaming behavior
+    # is now automatically handled by using a generator function (with 'yield').
     submit_btn.click(
         fn=analyze_image,
         inputs=[image_input, prompt_input],
+        outputs=output_text
     )
 # Launch the app
 if __name__ == "__main__":
+    demo.launch()