Update processing_granite_vision_embedding.py
Browse files
processing_granite_vision_embedding.py
CHANGED
|
@@ -23,7 +23,7 @@ def floor_by_factor(number: float, factor: int) -> int:
|
|
| 23 |
|
| 24 |
class GraniteVisionEmbProcessor(LlavaNextProcessor):
|
| 25 |
"""
|
| 26 |
-
Processor for
|
| 27 |
"""
|
| 28 |
|
| 29 |
visual_prompt_prefix: ClassVar[str] = "<|user|>\n<image>\nDescribe the image.\n"
|
|
@@ -300,7 +300,7 @@ class GraniteVisionEmbProcessor(LlavaNextProcessor):
|
|
| 300 |
images: List[Image.Image],
|
| 301 |
) -> BatchFeature:
|
| 302 |
"""
|
| 303 |
-
Process images
|
| 304 |
"""
|
| 305 |
# texts_doc = [self.apply_chat_template(self.format_data_wo_role(self.visual_prompt_prefix, img),tokenize=False ) for img in images]
|
| 306 |
texts_doc = [self.visual_prompt_prefix for _ in images]
|
|
@@ -394,7 +394,7 @@ class GraniteVisionEmbProcessor(LlavaNextProcessor):
|
|
| 394 |
) -> torch.Tensor:
|
| 395 |
"""
|
| 396 |
Compute the late-interaction/MaxSim score (ColBERT-like) for the given multi-vector
|
| 397 |
-
query embeddings (`qs`) and passage embeddings (`ps`). For
|
| 398 |
image of a document page.
|
| 399 |
|
| 400 |
Because the embedding tensors are multi-vector and can thus have different shapes, they
|
|
|
|
| 23 |
|
| 24 |
class GraniteVisionEmbProcessor(LlavaNextProcessor):
|
| 25 |
"""
|
| 26 |
+
Processor for GraniteVisionEmb.
|
| 27 |
"""
|
| 28 |
|
| 29 |
visual_prompt_prefix: ClassVar[str] = "<|user|>\n<image>\nDescribe the image.\n"
|
|
|
|
| 300 |
images: List[Image.Image],
|
| 301 |
) -> BatchFeature:
|
| 302 |
"""
|
| 303 |
+
Process images.
|
| 304 |
"""
|
| 305 |
# texts_doc = [self.apply_chat_template(self.format_data_wo_role(self.visual_prompt_prefix, img),tokenize=False ) for img in images]
|
| 306 |
texts_doc = [self.visual_prompt_prefix for _ in images]
|
|
|
|
| 394 |
) -> torch.Tensor:
|
| 395 |
"""
|
| 396 |
Compute the late-interaction/MaxSim score (ColBERT-like) for the given multi-vector
|
| 397 |
+
query embeddings (`qs`) and passage embeddings (`ps`). For us, a passage is the
|
| 398 |
image of a document page.
|
| 399 |
|
| 400 |
Because the embedding tensors are multi-vector and can thus have different shapes, they
|