Spaces:
Running
Running
| import gradio as gr | |
| from threading import Thread | |
| from PIL import Image | |
| import moondream as md | |
| from huggingface_hub import hf_hub_download | |
| # Download model at runtime | |
| model_path = hf_hub_download( | |
| repo_id="andito/moondream05", | |
| filename="moondream-0_5b-int8.mf", | |
| ) | |
| model = md.vl(model=model_path) | |
| def model_inference(input_dict, history): | |
| # Extract image from message if present | |
| if input_dict.get("files"): | |
| image_path = input_dict["files"][0] | |
| if isinstance(image_path, dict) and "path" in image_path: | |
| image_path = image_path["path"] | |
| image = Image.open(image_path) | |
| encoded_image = model.encode_image(image) | |
| # If there's a question, use query | |
| text = input_dict.get("text", "") | |
| if text not in ["", "Caption"]: | |
| response = model.query(encoded_image, text)["answer"] | |
| # Otherwise generate a caption | |
| else: | |
| response = model.caption(encoded_image)["caption"] | |
| return response | |
| else: | |
| return "Please provide an image to analyze." | |
| examples=[ | |
| [{"text": "Caption", "files": ["example_images/demo-1.jpg"]}, []], | |
| [{"text": "Caption", "files": ["example_images/demo-2.jpg"]}, []], | |
| [{"text": "What art era do this artpiece belong to?", "files": ["example_images/rococo.jpg"]}, []], | |
| [{"text": "Caption", "files": ["example_images/rococo.jpg"]}, []], | |
| [{"text": "I'm planning a visit to this temple, give me travel tips.", "files": ["example_images/examples_wat_arun.jpg"]}, []], | |
| [{"text": "Caption", "files": ["example_images/examples_wat_arun.jpg"]}, []], | |
| [{"text": "Caption", "files": ["example_images/aaron.jpeg"]}, []], | |
| ] | |
| demo = gr.ChatInterface(fn=model_inference, title="Moondream 0.5B: The World's Smallest Vision-Language Model", | |
| description="Play with [Moondream 0.5B](https://huggingface.co/vikhyatk/moondream2) in this demo. To get started, upload an image and text or try one of the examples.", | |
| examples=examples, | |
| textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="single"), stop_btn="Stop Generation", multimodal=True, | |
| additional_inputs=[], cache_examples=False) | |
| demo.launch(debug=True) |