Fabrice-TIERCELIN commited on
Commit
c24bf2e
·
verified ·
1 Parent(s): 0e28f69

Upload 11 files

Browse files
.gitattributes CHANGED
@@ -64,3 +64,9 @@ RealESRGAN_examples/Example1.mp4 filter=lfs diff=lfs merge=lfs -text
64
  cat.png filter=lfs diff=lfs merge=lfs -text
65
  flowers.png filter=lfs diff=lfs merge=lfs -text
66
  monster.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
64
  cat.png filter=lfs diff=lfs merge=lfs -text
65
  flowers.png filter=lfs diff=lfs merge=lfs -text
66
  monster.png filter=lfs diff=lfs merge=lfs -text
67
+ capyabara_zoomed.png filter=lfs diff=lfs merge=lfs -text
68
+ capyabara.webp filter=lfs diff=lfs merge=lfs -text
69
+ poli_tower.png filter=lfs diff=lfs merge=lfs -text
70
+ squatting_sonic.png filter=lfs diff=lfs merge=lfs -text
71
+ tower_takes_off.png filter=lfs diff=lfs merge=lfs -text
72
+ ugly_sonic.jpeg filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,21 +1,12 @@
1
  ---
2
- title: FramePack/HunyuanVideo
3
- emoji: 🎥
4
- colorFrom: pink
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 5.29.1
8
  app_file: app.py
9
- license: apache-2.0
10
- short_description: Text-to-Video/Image-to-Video/Video extender (timed prompt)
11
- tags:
12
- - Image-to-Video
13
- - Image-2-Video
14
- - Img-to-Vid
15
- - Img-2-Vid
16
- - language models
17
- - LLMs
18
- suggested_hardware: zero-a10g
19
  ---
20
 
21
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Wan 2 2 First Last Frame
3
+ emoji: 💻
4
+ colorFrom: purple
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 5.44.1
8
  app_file: app.py
9
+ pinned: false
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
The diff for this file is too large to render. See raw diff
 
capyabara.webp ADDED

Git LFS Details

  • SHA256: 26f8ee938a1f453a81e85c2035e3787b1e5ddbb9a92acb01688b39abd987c1e8
  • Pointer size: 131 Bytes
  • Size of remote file: 467 kB
capyabara_zoomed.png ADDED

Git LFS Details

  • SHA256: 37c27e972f09ab9b1c7df8aaa4b7c2cdbb702466e5bb0fecf5cb502ee531a26c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.58 MB
optimization.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ """
3
+
4
+ from typing import Any
5
+ from typing import Callable
6
+ from typing import ParamSpec
7
+
8
+ import spaces
9
+ import torch
10
+ from torch.utils._pytree import tree_map_only
11
+ from torchao.quantization import quantize_
12
+ from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
13
+ from torchao.quantization import Int8WeightOnlyConfig
14
+
15
+ from optimization_utils import capture_component_call
16
+ from optimization_utils import aoti_compile
17
+ from optimization_utils import drain_module_parameters
18
+
19
+
20
+ P = ParamSpec('P')
21
+
22
+ # --- CORRECTED DYNAMIC SHAPING ---
23
+
24
+ # VAE temporal scale factor is 1, latent_frames = num_frames. Range is [8, 81].
25
+ LATENT_FRAMES_DIM = torch.export.Dim('num_latent_frames', min=8, max=81)
26
+
27
+ # The transformer has a patch_size of (1, 2, 2), which means the input latent height and width
28
+ # are effectively divided by 2. This creates constraints that fail if the symbolic tracer
29
+ # assumes odd numbers are possible.
30
+ #
31
+ # To solve this, we define the dynamic dimension for the *patched* (i.e., post-division) size,
32
+ # and then express the input shape as 2 * this dimension. This mathematically guarantees
33
+ # to the compiler that the input latent dimensions are always even, satisfying the constraints.
34
+
35
+ # App range for pixel dimensions: [480, 832]. VAE scale factor is 8.
36
+ # Latent dimension range: [480/8, 832/8] = [60, 104].
37
+ # Patched latent dimension range: [60/2, 104/2] = [30, 52].
38
+ LATENT_PATCHED_HEIGHT_DIM = torch.export.Dim('latent_patched_height', min=30, max=52)
39
+ LATENT_PATCHED_WIDTH_DIM = torch.export.Dim('latent_patched_width', min=30, max=52)
40
+
41
+ # Now, we define the dynamic shapes for the transformer's `hidden_states` input,
42
+ # which has the shape (batch_size, channels, num_frames, height, width).
43
+ TRANSFORMER_DYNAMIC_SHAPES = {
44
+ 'hidden_states': {
45
+ 2: LATENT_FRAMES_DIM,
46
+ 3: 2 * LATENT_PATCHED_HEIGHT_DIM, # Guarantees even height
47
+ 4: 2 * LATENT_PATCHED_WIDTH_DIM, # Guarantees even width
48
+ },
49
+ }
50
+
51
+ # --- END OF CORRECTION ---
52
+
53
+
54
+ INDUCTOR_CONFIGS = {
55
+ 'conv_1x1_as_mm': True,
56
+ 'epilogue_fusion': False,
57
+ 'coordinate_descent_tuning': True,
58
+ 'coordinate_descent_check_all_directions': True,
59
+ 'max_autotune': True,
60
+ 'triton.cudagraphs': True,
61
+ }
62
+
63
+
64
+ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
65
+
66
+ @spaces.GPU(duration=1500)
67
+ def compile_transformer():
68
+
69
+ # This LoRA fusion part remains the same
70
+ pipeline.load_lora_weights(
71
+ "Kijai/WanVideo_comfy",
72
+ weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
73
+ adapter_name="lightx2v"
74
+ )
75
+ kwargs_lora = {}
76
+ kwargs_lora["load_into_transformer_2"] = True
77
+ pipeline.load_lora_weights(
78
+ "Kijai/WanVideo_comfy",
79
+ weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
80
+ adapter_name="lightx2v_2", **kwargs_lora
81
+ )
82
+ pipeline.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
83
+ pipeline.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
84
+ pipeline.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
85
+ pipeline.unload_lora_weights()
86
+
87
+ # Capture a single call to get the args/kwargs structure
88
+ with capture_component_call(pipeline, 'transformer') as call:
89
+ pipeline(*args, **kwargs)
90
+
91
+ dynamic_shapes = tree_map_only((torch.Tensor, bool), lambda t: None, call.kwargs)
92
+ dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
93
+
94
+ # Quantization remains the same
95
+ quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
96
+ quantize_(pipeline.transformer_2, Float8DynamicActivationFloat8WeightConfig())
97
+
98
+ # --- SIMPLIFIED COMPILATION ---
99
+
100
+ exported_1 = torch.export.export(
101
+ mod=pipeline.transformer,
102
+ args=call.args,
103
+ kwargs=call.kwargs,
104
+ dynamic_shapes=dynamic_shapes,
105
+ )
106
+
107
+ exported_2 = torch.export.export(
108
+ mod=pipeline.transformer_2,
109
+ args=call.args,
110
+ kwargs=call.kwargs,
111
+ dynamic_shapes=dynamic_shapes,
112
+ )
113
+
114
+ compiled_1 = aoti_compile(exported_1, INDUCTOR_CONFIGS)
115
+ compiled_2 = aoti_compile(exported_2, INDUCTOR_CONFIGS)
116
+
117
+ # Return the two compiled models
118
+ return compiled_1, compiled_2
119
+
120
+
121
+ # Quantize text encoder (same as before)
122
+ quantize_(pipeline.text_encoder, Int8WeightOnlyConfig())
123
+
124
+ # Get the two dynamically-shaped compiled models
125
+ compiled_transformer_1, compiled_transformer_2 = compile_transformer()
126
+
127
+ # --- SIMPLIFIED ASSIGNMENT ---
128
+
129
+ pipeline.transformer.forward = compiled_transformer_1
130
+ drain_module_parameters(pipeline.transformer)
131
+
132
+ pipeline.transformer_2.forward = compiled_transformer_2
133
+ drain_module_parameters(pipeline.transformer_2)
optimization_utils.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ """
3
+ import contextlib
4
+ from contextvars import ContextVar
5
+ from io import BytesIO
6
+ from typing import Any
7
+ from typing import cast
8
+ from unittest.mock import patch
9
+
10
+ import torch
11
+ from torch._inductor.package.package import package_aoti
12
+ from torch.export.pt2_archive._package import AOTICompiledModel
13
+ from torch.export.pt2_archive._package_weights import Weights
14
+
15
+
16
+ INDUCTOR_CONFIGS_OVERRIDES = {
17
+ 'aot_inductor.package_constants_in_so': False,
18
+ 'aot_inductor.package_constants_on_disk': True,
19
+ 'aot_inductor.package': True,
20
+ }
21
+
22
+
23
+ class ZeroGPUWeights:
24
+ def __init__(self, constants_map: dict[str, torch.Tensor], to_cuda: bool = False):
25
+ if to_cuda:
26
+ self.constants_map = {name: tensor.to('cuda') for name, tensor in constants_map.items()}
27
+ else:
28
+ self.constants_map = constants_map
29
+ def __reduce__(self):
30
+ constants_map: dict[str, torch.Tensor] = {}
31
+ for name, tensor in self.constants_map.items():
32
+ tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
33
+ constants_map[name] = tensor_.copy_(tensor).detach().share_memory_()
34
+ return ZeroGPUWeights, (constants_map, True)
35
+
36
+
37
+ class ZeroGPUCompiledModel:
38
+ def __init__(self, archive_file: torch.types.FileLike, weights: ZeroGPUWeights):
39
+ self.archive_file = archive_file
40
+ self.weights = weights
41
+ self.compiled_model: ContextVar[AOTICompiledModel | None] = ContextVar('compiled_model', default=None)
42
+ def __call__(self, *args, **kwargs):
43
+ if (compiled_model := self.compiled_model.get()) is None:
44
+ compiled_model = cast(AOTICompiledModel, torch._inductor.aoti_load_package(self.archive_file))
45
+ compiled_model.load_constants(self.weights.constants_map, check_full_update=True, user_managed=True)
46
+ self.compiled_model.set(compiled_model)
47
+ return compiled_model(*args, **kwargs)
48
+ def __reduce__(self):
49
+ return ZeroGPUCompiledModel, (self.archive_file, self.weights)
50
+
51
+
52
+ def aoti_compile(
53
+ exported_program: torch.export.ExportedProgram,
54
+ inductor_configs: dict[str, Any] | None = None,
55
+ ):
56
+ inductor_configs = (inductor_configs or {}) | INDUCTOR_CONFIGS_OVERRIDES
57
+ gm = cast(torch.fx.GraphModule, exported_program.module())
58
+ assert exported_program.example_inputs is not None
59
+ args, kwargs = exported_program.example_inputs
60
+ artifacts = torch._inductor.aot_compile(gm, args, kwargs, options=inductor_configs)
61
+ archive_file = BytesIO()
62
+ files: list[str | Weights] = [file for file in artifacts if isinstance(file, str)]
63
+ package_aoti(archive_file, files)
64
+ weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
65
+ zerogpu_weights = ZeroGPUWeights({name: weights.get_weight(name)[0] for name in weights})
66
+ return ZeroGPUCompiledModel(archive_file, zerogpu_weights)
67
+
68
+
69
+ @contextlib.contextmanager
70
+ def capture_component_call(
71
+ pipeline: Any,
72
+ component_name: str,
73
+ component_method='forward',
74
+ ):
75
+
76
+ class CapturedCallException(Exception):
77
+ def __init__(self, *args, **kwargs):
78
+ super().__init__()
79
+ self.args = args
80
+ self.kwargs = kwargs
81
+
82
+ class CapturedCall:
83
+ def __init__(self):
84
+ self.args: tuple[Any, ...] = ()
85
+ self.kwargs: dict[str, Any] = {}
86
+
87
+ component = getattr(pipeline, component_name)
88
+ captured_call = CapturedCall()
89
+
90
+ def capture_call(*args, **kwargs):
91
+ raise CapturedCallException(*args, **kwargs)
92
+
93
+ with patch.object(component, component_method, new=capture_call):
94
+ try:
95
+ yield captured_call
96
+ except CapturedCallException as e:
97
+ captured_call.args = e.args
98
+ captured_call.kwargs = e.kwargs
99
+
100
+
101
+ def drain_module_parameters(module: torch.nn.Module):
102
+ state_dict_meta = {name: {'device': tensor.device, 'dtype': tensor.dtype} for name, tensor in module.state_dict().items()}
103
+ state_dict = {name: torch.nn.Parameter(torch.empty_like(tensor, device='cpu')) for name, tensor in module.state_dict().items()}
104
+ module.load_state_dict(state_dict, assign=True)
105
+ for name, param in state_dict.items():
106
+ meta = state_dict_meta[name]
107
+ param.data = torch.Tensor([]).to(**meta)
poli_tower.png ADDED

Git LFS Details

  • SHA256: 96bc0e056b5aee2d2f1ed7723bab4f9c928dfb519ec21380aff4bbb12d22b849
  • Pointer size: 132 Bytes
  • Size of remote file: 3.49 MB
requirements.txt CHANGED
@@ -1,23 +1,11 @@
1
- accelerate==1.7.0
2
- diffusers==0.33.1
3
- transformers==4.52.4
4
- sentencepiece==0.2.0
5
- pillow==11.2.1
6
- av==12.1.0
7
- numpy==1.26.2
8
- scipy==1.12.0
9
- requests==2.32.4
10
- torchsde==0.2.6
11
- torch>=2.0.0
12
- torchvision
13
- torchaudio
14
- einops==0.8.1
15
- opencv-contrib-python
16
  safetensors
17
- huggingface_hub==0.34.3
18
- decord==0.6.0
19
- imageio_ffmpeg==0.6.0
20
- sageattention==1.0.6
21
- xformers==0.0.29.post3
22
- bitsandbytes==0.46.0
23
- pillow-heif==0.22.0
 
1
+ git+https://github.com/linoytsaban/diffusers.git@wan22-loras
2
+
3
+ transformers
4
+ accelerate
 
 
 
 
 
 
 
 
 
 
 
5
  safetensors
6
+ sentencepiece
7
+ peft
8
+ ftfy
9
+ imageio-ffmpeg
10
+ opencv-python
11
+ torchao==0.11.0
 
squatting_sonic.png ADDED

Git LFS Details

  • SHA256: d5675e8192c6274c22b07cb60af92b8577d9fcf26f79a10450a325e385e17e18
  • Pointer size: 132 Bytes
  • Size of remote file: 1.05 MB
tower_takes_off.png ADDED

Git LFS Details

  • SHA256: 3f824eae87d73d1b841354fcb96cfe5f7d08f8f2d6410bfaed864ecaf1500499
  • Pointer size: 132 Bytes
  • Size of remote file: 1.43 MB
ugly_sonic.jpeg ADDED

Git LFS Details

  • SHA256: 37f76cf1cbb3a3fa0a6eb26898c8f89f71fa280d13f30fcc9dfdd3709cb9824d
  • Pointer size: 131 Bytes
  • Size of remote file: 290 kB