# import io # import base64 # from typing import List, Dict, Tuple # from PIL import Image # from transformers import pipeline # from src.config import LOGO_DETECTION_MODEL # # -------------------------------------------------- # # MODEL INITIALIZATION (LOAD ONCE) # # -------------------------------------------------- # # Object detection pipeline for logo / seal detection # detector = pipeline( # task="object-detection", # model=LOGO_DETECTION_MODEL, # device=-1 # CPU # ) # # -------------------------------------------------- # # LOGO DETECTION # # -------------------------------------------------- # def detect_logos_from_bytes( # image_bytes: bytes, # resize: Tuple[int, int] = (1024, 1024), # max_logos: int = 3 # ) -> List[Dict[str, str | float]]: # """ # Detect logos or visual emblems from raw image bytes. # The function resizes the image for faster inference, # detects logo regions, crops them, and returns the # cropped logo images encoded in base64 along with # confidence scores. # Parameters # ---------- # image_bytes : bytes # Raw image data. # resize : tuple[int, int], optional # Maximum image size for inference (default: 1024x1024). # max_logos : int, optional # Maximum number of detected logos to return. # Returns # ------- # list[dict] # List of detected logos with: # - confidence: float # - image_base64: str # """ # # Load image from bytes # image: Image.Image = Image.open(io.BytesIO(image_bytes)).convert("RGB") # # Resize image for performance optimization # image.thumbnail(resize) # # Run object detection # detections = detector(image) # results: List[Dict[str, str | float]] = [] # # Process top detections only # for det in detections[:max_logos]: # box = det["box"] # score: float = float(det["score"]) # xmin: int = int(box["xmin"]) # ymin: int = int(box["ymin"]) # xmax: int = int(box["xmax"]) # ymax: int = int(box["ymax"]) # # Crop detected logo region # cropped = image.crop((xmin, ymin, xmax, ymax)) # # Convert cropped logo to base64 # buffer = io.BytesIO() # cropped.save(buffer, format="PNG") # results.append({ # "confidence": round(score, 3), # "image_base64": base64.b64encode(buffer.getvalue()).decode() # }) # return results import io import base64 from typing import List, Dict, Tuple from PIL import Image from transformers import pipeline from src.config import LOGO_DETECTION_MODEL # -------------------------------------------------- # MODEL INITIALIZATION (LOAD ONCE) # -------------------------------------------------- detector = pipeline( task="object-detection", model=LOGO_DETECTION_MODEL, device=-1 # CPU (HF Spaces safe) ) # -------------------------------------------------- # LOGO DETECTION FUNCTION # -------------------------------------------------- def detect_logos_from_bytes( image_bytes: bytes, resize: Tuple[int, int] = (1024, 1024), max_logos: int = 4, threshold: float = 0.2 ) -> List[Dict[str, str | float]]: """ Detect logos or visual emblems from raw image bytes. Returns cropped logo images (base64) with confidence scores. Works consistently on local & Hugging Face Spaces. """ # ------------------------------- # Load image (deterministic) # ------------------------------- image = Image.open(io.BytesIO(image_bytes)).convert("RGB") # Deterministic resize (NO thumbnail) image = image.resize( ( min(image.width, resize[0]), min(image.height, resize[1]) ) ) # ------------------------------- # Object detection (EXPLICIT threshold) # ------------------------------- detections = detector( image, threshold=threshold ) if not detections: return [] # ------------------------------- # Sort by confidence (IMPORTANT) # ------------------------------- detections = sorted( detections, key=lambda x: x["score"], reverse=True ) results: List[Dict[str, str | float]] = [] # ------------------------------- # Process top detections # ------------------------------- for det in detections[:max_logos]: box = det["box"] score = float(det["score"]) xmin = max(0, int(box["xmin"])) ymin = max(0, int(box["ymin"])) xmax = min(image.width, int(box["xmax"])) ymax = min(image.height, int(box["ymax"])) # Safety check if xmax <= xmin or ymax <= ymin: continue # Crop logo region cropped = image.crop((xmin, ymin, xmax, ymax)) # Encode cropped logo to base64 buffer = io.BytesIO() cropped.save(buffer, format="PNG") results.append({ "confidence": round(score, 3), "image_base64": base64.b64encode(buffer.getvalue()).decode("utf-8") }) return results