Spaces:
Sleeping
Sleeping
File size: 4,217 Bytes
cbbe76e 26ae6bd ebe7430 9c79679 cbbe76e 56d2f3b cbbe76e 26ae6bd cbbe76e 26ae6bd bf32df6 26ae6bd 25fa2d2 26ae6bd cbbe76e 7a3c832 cbbe76e 26ae6bd 4704563 26ae6bd a8eff0f 26ae6bd 742a487 26ae6bd 25fa2d2 26ae6bd cbbe76e 26ae6bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
from io import BytesIO
import json, re
import os
import base64
import requests
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from PIL import Image
app = FastAPI(title="GLM-4.1V-9B-Thinking")
# Enable CORS for frontend interaction (Gradio/Spaces UI)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
API_URL = "https://router.huggingface.co/v1/chat/completions"
HEADERS = {
"Authorization": f"Bearer {os.environ['access_token']}",
"Content-Type": "application/json"
}
PROMPT = """
You are an AI assistant. Extract item names and their prices from the following image.
Your task is to extract item names and their corresponding prices from the image provided.
Return ONLY a clean JSON array in this format:
[
{"item": "<item_name>", "price": "<price>"},
...
]
⚠️ Guidelines:
- Do not include any explanation or text before/after the JSON.
- Include only entries that have both item and price.
- Preserve original spellings and formatting from the image.
- If prices are written in ₹, Rs., or INR, keep the symbol as is.
- Handle both packaged labels (like chips or snacks) and printed/handwritten menus.
- If there are duplicates or unclear text, skip them.
Only return the final JSON output, No explanation.
Make sure each entry has both item and price, and preserve the original spelling.
"""
def resize_image(image: Image.Image, max_size=(1024, 1024)) -> Image.Image:
image.thumbnail(max_size)
return image
async def encode_image_to_data_url(file: UploadFile=File(...)) -> str:
image = Image.open(BytesIO(await file.read()))
# Preprocessing
image = resize_image(image)
# Compress and convert to bytes
buffered = BytesIO()
image.save(buffered, quality=80, format=image.format)
buffered.seek(0)
image_bytes = buffered.getvalue()
# Encode to base64
base64_image = base64.b64encode(image_bytes).decode("utf-8")
mime_type = file.content_type
return f"data:{mime_type};base64,{base64_image}"
@app.get("/")
def root():
return {"message": "GLM 4.1V API for menu extraction is running."}
@app.post("/extract/")
async def extract(file: UploadFile = File(...)):
try:
# Convert uploaded image to base64 URL format
image_data_url = await encode_image_to_data_url(file)
# Create chat-style payload
payload = {
"model": "zai-org/GLM-4.1V-9B-Thinking:novita",
# "model": "meta-llama/Llama-3.2-11B-Vision-Instruct:together",
# "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct:novita",
# "model": "llama3.2-vision:11b",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": PROMPT
},
{
"type": "image_url",
"image_url": {
"url": image_data_url
}
}
]
}
]
}
# Send POST request to Hugging Face Chat Completion endpoint
response = requests.post(API_URL, headers=HEADERS, json=payload)
result = response.json()
print("result :", result)
reply = result["choices"][0]["message"]["content"]
except Exception as e:
return JSONResponse(content={"error": str(e)}, status_code=400)
match = re.search(r"\[\s*{.*?}\s*\]", reply, re.DOTALL)
if match:
json_str = match.group(0)
try:
items = json.loads(json_str)
return JSONResponse(content={"menu_items": items})
except json.JSONDecodeError:
return JSONResponse(status_code=500, content={"error": "Failed to parse JSON", "raw": json_str})
else:
return JSONResponse(status_code=404,
content={"error": "No JSON array found in response", "model_response": reply})
|