hxssgaa commited on
Commit
f447b01
·
verified ·
1 Parent(s): c671e8c

Update video processor max pixels

Browse files
Files changed (1) hide show
  1. processing_colqwen3.py +14 -0
processing_colqwen3.py CHANGED
@@ -169,6 +169,20 @@ class ColQwen3Processor(ProcessorMixin):
169
  instance.image_processor.max_pixels = max_num_visual_tokens * tile * tile
170
  instance.image_processor.size["longest_edge"] = instance.image_processor.max_pixels
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  return instance
173
 
174
  def __call__(
 
169
  instance.image_processor.max_pixels = max_num_visual_tokens * tile * tile
170
  instance.image_processor.size["longest_edge"] = instance.image_processor.max_pixels
171
 
172
+ video_patch_size = getattr(instance.video_processor, "patch_size", None)
173
+ video_merge_size = getattr(instance.video_processor, "merge_size", None) or getattr(
174
+ instance.video_processor, "spatial_merge_size", None
175
+ )
176
+ video_temporal_patch_size = getattr(instance.video_processor, "temporal_patch_size", None)
177
+ if video_patch_size is None or video_merge_size is None or video_temporal_patch_size is None:
178
+ raise ValueError(
179
+ "Qwen3VL video processor is missing `patch_size`, `merge_size`/`spatial_merge_size`, or `temporal_patch_size`."
180
+ )
181
+ video_tile = video_patch_size * video_merge_size
182
+ # Include temporal patching so the visual token cap applies across space and time.
183
+ instance.video_processor.max_pixels = max_num_visual_tokens * video_tile * video_tile * video_temporal_patch_size
184
+ instance.video_processor.size["longest_edge"] = instance.video_processor.max_pixels
185
+
186
  return instance
187
 
188
  def __call__(