Spaces:

Ephemeral182
/

PosterCraft

Running

App Files Files Community

Ephemeral182 commited on Jun 13

Commit

1aca16b

verified ·

1 Parent(s): 4b3c0f3

Update app.py

Browse files

Files changed (1) hide show

app.py +179 -211

app.py CHANGED Viewed

@@ -45,14 +45,7 @@ logging.basicConfig(
 # 2. Model Download Function (CPU only)
 # ------------------------------------------------------------------
 def download_model_weights(target_dir, repo_id, subdir=None):
-    """
-    Download model weights to specified directory (CPU operation)
-    Args:
-        target_dir (str): Local target directory
-        repo_id (str): HuggingFace repository ID
-        subdir (str): Subdirectory path in the repository (optional)
-    """
     from huggingface_hub import snapshot_download
     import shutil
@@ -71,7 +64,6 @@ def download_model_weights(target_dir, repo_id, subdir=None):
             "local_dir_use_symlinks": False,
         }
-        # Add token if available
         if hf_token:
             download_kwargs["token"] = hf_token
@@ -125,26 +117,23 @@ def ensure_models_downloaded():
 ensure_models_downloaded()
 # ------------------------------------------------------------------
-# 4. Qwen Prompt Rewriting Agent
 # ------------------------------------------------------------------
-def create_qwen_agent(model_path):
-    """Create Qwen agent inside GPU context"""
-    load_kwargs = {
-        "torch_dtype": torch.bfloat16,
-        "device_map": "auto"
-    }
-    # Add token if available
-    if hf_token:
-        load_kwargs["token"] = hf_token
-    tokenizer = AutoTokenizer.from_pretrained(model_path, **load_kwargs)
-    model = AutoModelForCausalLM.from_pretrained(model_path, **load_kwargs)
-    return tokenizer, model
-def recap_prompt(tokenizer, model, text):
-    """Recap prompt using Qwen model"""
-    prompt_template = """You are an expert poster prompt designer. Your task is to rewrite a user's short poster prompt into a detailed and vivid long-format prompt. Follow these steps carefully:
 **Step 1: Analyze the Core Requirements**
 Identify the key elements in the user's prompt. Do not miss any details.
@@ -182,33 +171,120 @@ Elaborate on each core requirement to create a rich description.
 ---
 **User Prompt:**
 {brief_description}"""
-    try:
-        messages = [
-            {"role": "user", "content": prompt_template.format(brief_description=text)}
-        ]
-        chat = tokenizer.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
-        )
-        inputs = tokenizer([chat], return_tensors="pt").to(model.device)
-        with torch.no_grad():
-            ids = model.generate(
-                **inputs, max_new_tokens=1024, temperature=0.6, do_sample=True
             )
-        out = tokenizer.decode(
-            ids[0][len(inputs.input_ids[0]):], skip_special_tokens=True
-        ).strip()
-        if "</think>" in out:
-            out = out.split("</think>")[-1].strip()
-        return out or text
-    except Exception as e:
-        logging.error(f"Prompt recap failed: {e}")
-        return text
 # ------------------------------------------------------------------
-# 5. Main Generation Function (GPU)
 # ------------------------------------------------------------------
 @spaces.GPU(duration=300)
 def generate_image_interface(
@@ -217,102 +293,47 @@ def generate_image_interface(
     progress=gr.Progress(track_tqdm=True),
 ):
     """Generate image using FLUX pipeline"""
     try:
-        # If no token available, return error message
         if not hf_token:
             return None, "❌ Error: HF_TOKEN not found. Please configure authentication.", ""
-        # Set device and dtype
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        torch_dtype = torch.bfloat16 if device.type == "cuda" else torch.float32
-        # Initialize FLUX pipeline
-        progress(0.1, desc="Loading FLUX pipeline...")
-        pipeline = FluxPipeline.from_pretrained(
-            DEFAULT_PIPELINE_PATH,
-            torch_dtype=torch_dtype,
-            device_map="balanced" if device.type == "cuda" else None,
-            token=hf_token
-        )
-        # Load custom transformer weights if available
-        custom_weights_local = "local_weights/PosterCraft-v1_RL"
-        if os.path.exists(custom_weights_local):
-            progress(0.3, desc="Loading custom transformer weights...")
-            try:
-                custom_transformer = FluxTransformer2DModel.from_pretrained(
-                    custom_weights_local,
-                    torch_dtype=torch_dtype,
-                    device_map="balanced" if device.type == "cuda" else None,
-                    token=hf_token
-                )
-                pipeline.transformer = custom_transformer
-                logging.info("Custom transformer weights loaded successfully")
-            except Exception as e:
-                logging.warning(f"Failed to load custom transformer weights: {e}")
-        # Process prompt
-        final_prompt = original_prompt
-        if enable_recap:
-            progress(0.5, desc="Processing prompt with Qwen...")
-            qwen_local = "local_weights/Qwen3-8B"
-            if os.path.exists(qwen_local):
-                try:
-                    tokenizer, model = create_qwen_agent(qwen_local)
-                    final_prompt = recap_prompt(tokenizer, model, original_prompt)
-                    logging.info(f"Enhanced prompt: {final_prompt}")
-                    # Clean up Qwen model to free memory
-                    del tokenizer, model
-                    torch.cuda.empty_cache()
-                except Exception as e:
-                    logging.warning(f"Qwen processing failed: {e}")
-                    final_prompt = original_prompt
-            else:
-                # Fallback to online Qwen model
-                try:
-                    tokenizer, model = create_qwen_agent(DEFAULT_QWEN_MODEL_PATH)
-                    final_prompt = recap_prompt(tokenizer, model, original_prompt)
-                    del tokenizer, model
-                    torch.cuda.empty_cache()
-                except Exception as e:
-                    logging.warning(f"Online Qwen failed: {e}")
-                    final_prompt = original_prompt
-        # Generate seed
-        if seed_input == -1:
-            seed = random.randint(0, MAX_SEED)
-        else:
-            seed = int(seed_input)
-        generator = torch.Generator(device=device).manual_seed(seed)
-        # Generate image
-        progress(0.7, desc="Generating image...")
-        with torch.no_grad():
-            result = pipeline(
-                prompt=final_prompt,
-                height=height,
-                width=width,
-                num_inference_steps=num_inference_steps,
-                guidance_scale=guidance_scale,
-                generator=generator,
             )
-        image = result.images[0]
-        # Clean up
-        del pipeline
-        torch.cuda.empty_cache()
-        progress(1.0, desc="Complete!")
-        return image, f"✅ Generation complete! Seed: {seed}", final_prompt
     except Exception as e:
         logging.error(f"Generation failed: {e}")
         return None, f"❌ Generation failed: {str(e)}", ""
 # ------------------------------------------------------------------
-# 6. Gradio Interface
 # ------------------------------------------------------------------
 def create_interface():
     """Create Gradio interface"""
@@ -323,8 +344,6 @@ def create_interface():
         css="""
         .main-container { max-width: 1200px; margin: 0 auto; }
         .status-box { padding: 10px; border-radius: 5px; margin: 10px 0; }
-        .auth-success { background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
-        .auth-error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
         """
     ) as demo:
@@ -343,98 +362,47 @@ def create_interface():
         gr.HTML("""
         <div class="status-box">
-            <p><strong>⚠️ First use requires model download, please wait about 10-15 minutes</strong></p>
         </div>
         """)
         with gr.Row():
             with gr.Column(scale=1):
-                original_prompt = gr.Textbox(
-                    label="Poster Prompt",
                     placeholder="Enter your poster description...",
-                    lines=3,
                     value="A vintage travel poster for Paris, featuring the Eiffel Tower at sunset with warm golden lighting"
                 )
-                enable_recap = gr.Checkbox(
-                    label="Enable Prompt Enhancement (Qwen3-8B)",
-                    value=True,
                     info="Use AI to enhance and expand your prompt"
                 )
                 with gr.Row():
-                    height = gr.Slider(
-                        label="Height",
-                        minimum=256,
-                        maximum=MAX_IMAGE_SIZE,
-                        value=1024,
-                        step=32
-                    )
-                    width = gr.Slider(
-                        label="Width",
-                        minimum=256,
-                        maximum=MAX_IMAGE_SIZE,
-                        value=768,
-                        step=32
-                    )
-                with gr.Row():
-                    num_inference_steps = gr.Slider(
-                        label="Inference Steps",
-                        minimum=1,
-                        maximum=50,
-                        value=20,
-                        step=1
-                    )
-                    guidance_scale = gr.Slider(
-                        label="Guidance Scale",
-                        minimum=1.0,
-                        maximum=15.0,
-                        value=3.5,
-                        step=0.1
-                    )
-                seed_input = gr.Number(
-                    label="Seed (-1 for random)",
-                    value=-1,
-                    precision=0
-                )
-                generate_btn = gr.Button(
-                    "🎨 Generate Poster",
-                    variant="primary",
-                    size="lg"
-                )
             with gr.Column(scale=1):
-                output_image = gr.Image(
-                    label="Generated Poster",
-                    type="pil",
-                    height=600
-                )
-                status_output = gr.Textbox(
-                    label="Generation Status",
-                    interactive=False,
-                    lines=2
-                )
-                enhanced_prompt = gr.Textbox(
-                    label="Enhanced Prompt",
-                    interactive=False,
-                    lines=5,
-                    info="The final prompt used for generation"
-                )
-        # Event handlers
-        generate_btn.click(
-            fn=generate_image_interface,
-            inputs=[
-                original_prompt, enable_recap, height, width,
-                num_inference_steps, guidance_scale, seed_input
-            ],
-            outputs=[output_image, status_output, enhanced_prompt]
-        )
         # Examples
         gr.Examples(
@@ -444,13 +412,13 @@ def create_interface():
                 ["A minimalist concert poster with bold typography"],
                 ["A vintage advertisement for organic coffee"],
             ],
-            inputs=[original_prompt]
         )
     return demo
 # ------------------------------------------------------------------
-# 7. Launch Application
 # ------------------------------------------------------------------
 if __name__ == "__main__":
     demo = create_interface()
@@ -458,4 +426,4 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=7860,
         show_api=False
-    )

 # 2. Model Download Function (CPU only)
 # ------------------------------------------------------------------
 def download_model_weights(target_dir, repo_id, subdir=None):
+    """Download model weights to specified directory (CPU operation)"""
     from huggingface_hub import snapshot_download
     import shutil
             "local_dir_use_symlinks": False,
         }
         if hf_token:
             download_kwargs["token"] = hf_token
 ensure_models_downloaded()
 # ------------------------------------------------------------------
+# 4. Qwen Recap Agent (基于你的原始逻辑)
 # ------------------------------------------------------------------
+class QwenRecapAgent:
+    def __init__(self, model_path, max_retries=3, retry_delay=2, device_map="auto"):
+        self.max_retries = max_retries
+        self.retry_delay = retry_delay
+        self.device = device_map
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token)
+        model_kwargs = {"torch_dtype": torch.bfloat16, "device_map": device_map if device_map == "auto" else None}
+        if hf_token:
+            model_kwargs["token"] = hf_token
+        self.model = AutoModelForCausalLM.from_pretrained(model_path, **model_kwargs)
+        if device_map != "auto":
+             self.model.to(device_map)
+        self.prompt_template = """You are an expert poster prompt designer. Your task is to rewrite a user's short poster prompt into a detailed and vivid long-format prompt. Follow these steps carefully:
 **Step 1: Analyze the Core Requirements**
 Identify the key elements in the user's prompt. Do not miss any details.
 ---
 **User Prompt:**
 {brief_description}"""
+    def recap_prompt(self, original_prompt):
+        full_prompt = self.prompt_template.format(brief_description=original_prompt)
+        messages = [{"role": "user", "content": full_prompt}]
+        try:
+            text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
+            model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
+            with torch.no_grad():
+                generated_ids = self.model.generate(**model_inputs, max_new_tokens=1024, temperature=0.6)
+            output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
+            full_response = self.tokenizer.decode(output_ids, skip_special_tokens=True)
+            final_answer = self._extract_final_answer(full_response)
+            if final_answer:
+                return final_answer.strip()
+            logging.info("Qwen returned an empty answer. Using original prompt.")
+            return original_prompt
+        except Exception as e:
+            logging.error(f"Qwen recap failed: {e}. Using original prompt.")
+            return original_prompt
+    def _extract_final_answer(self, full_response):
+        if "</think>" in full_response:
+            return full_response.split("</think>")[-1].strip()
+        if "<think>" not in full_response:
+            return full_response.strip()
+        return None
+# ------------------------------------------------------------------
+# 5. Poster Generator Class (基于你的原始逻辑，但加上缓存)
+# ------------------------------------------------------------------
+class PosterGenerator:
+    def __init__(self, pipeline_path, qwen_model_path, custom_weights_path, device):
+        self.device = device
+        self.pipeline_path = pipeline_path
+        self.qwen_model_path = qwen_model_path
+        self.custom_weights_path = custom_weights_path
+        # 缓存变量
+        self.qwen_agent = None
+        self.pipeline = None
+    def _load_qwen_agent(self):
+        if self.qwen_agent is None:
+            if not self.qwen_model_path:
+                return None
+            # 检查本地路径
+            qwen_local = "local_weights/Qwen3-8B"
+            model_path = qwen_local if os.path.exists(qwen_local) else self.qwen_model_path
+            logging.info(f"Loading Qwen agent from {model_path}")
+            self.qwen_agent = QwenRecapAgent(model_path=model_path, device_map=str(self.device))
+        return self.qwen_agent
+    def _load_flux_pipeline(self):
+        if self.pipeline is None:
+            logging.info("Loading FLUX pipeline...")
+            self.pipeline = FluxPipeline.from_pretrained(
+                self.pipeline_path,
+                torch_dtype=torch.bfloat16,
+                token=hf_token
             )
+            # 加载自定义权重
+            custom_weights_local = "local_weights/PosterCraft-v1_RL"
+            if os.path.exists(custom_weights_local):
+                logging.info(f"Loading custom Transformer from directory: {custom_weights_local}")
+                transformer = FluxTransformer2DModel.from_pretrained(
+                    custom_weights_local,
+                    torch_dtype=torch.bfloat16,
+                    token=hf_token
+                )
+                self.pipeline.transformer = transformer
+            elif self.custom_weights_path and os.path.exists(self.custom_weights_path):
+                logging.info(f"Loading custom Transformer from directory: {self.custom_weights_path}")
+                transformer = FluxTransformer2DModel.from_pretrained(
+                    self.custom_weights_path,
+                    torch_dtype=torch.bfloat16,
+                    token=hf_token
+                )
+                self.pipeline.transformer = transformer
+            self.pipeline.to(self.device)
+        return self.pipeline
+    def generate(self, prompt, enable_recap, **kwargs):
+        final_prompt = prompt
+        if enable_recap:
+            qwen_agent = self._load_qwen_agent()
+            if not qwen_agent:
+                raise gr.Error("Recap is enabled, but the recap model is not available. Check model path.")
+            final_prompt = qwen_agent.recap_prompt(prompt)
+        pipeline = self._load_flux_pipeline()
+        generator = torch.Generator(device=self.device).manual_seed(kwargs['seed'])
+        with torch.inference_mode():
+            image = pipeline(
+                prompt=final_prompt,
+                generator=generator,
+                num_inference_steps=kwargs['num_inference_steps'],
+                guidance_scale=kwargs['guidance_scale'],
+                width=kwargs['width'],
+                height=kwargs['height']
+            ).images[0]
+        return image, final_prompt
 # ------------------------------------------------------------------
+# 6. Main Generation Function (GPU) - 保持你的原始逻辑
 # ------------------------------------------------------------------
 @spaces.GPU(duration=300)
 def generate_image_interface(
     progress=gr.Progress(track_tqdm=True),
 ):
     """Generate image using FLUX pipeline"""
+    if not original_prompt or not original_prompt.strip():
+        return None, "❌ Prompt cannot be empty!", ""
     try:
         if not hf_token:
             return None, "❌ Error: HF_TOKEN not found. Please configure authentication.", ""
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # 全局生成器实例
+        if not hasattr(generate_image_interface, 'generator'):
+            generate_image_interface.generator = PosterGenerator(
+                pipeline_path=DEFAULT_PIPELINE_PATH,
+                qwen_model_path=DEFAULT_QWEN_MODEL_PATH,
+                custom_weights_path=DEFAULT_CUSTOM_WEIGHTS_PATH,
+                device=device
             )
+        actual_seed = int(seed_input) if seed_input and seed_input != -1 else random.randint(1, 2**32 - 1)
+        progress(0.1, desc="Starting generation...")
+        image, final_prompt = generate_image_interface.generator.generate(
+            prompt=original_prompt,
+            enable_recap=enable_recap,
+            height=int(height),
+            width=int(width),
+            num_inference_steps=int(num_inference_steps),
+            guidance_scale=float(guidance_scale),
+            seed=actual_seed
+        )
+        status_log = f"✅ Generation complete! Seed: {actual_seed}"
+        return image, status_log, final_prompt
     except Exception as e:
         logging.error(f"Generation failed: {e}")
         return None, f"❌ Generation failed: {str(e)}", ""
 # ------------------------------------------------------------------
+# 7. Gradio Interface (保持你的原始风格)
 # ------------------------------------------------------------------
 def create_interface():
     """Create Gradio interface"""
         css="""
         .main-container { max-width: 1200px; margin: 0 auto; }
         .status-box { padding: 10px; border-radius: 5px; margin: 10px 0; }
         """
     ) as demo:
         gr.HTML("""
         <div class="status-box">
+            <p><strong>⚠️ First generation requires model loading (5-10 minutes). Subsequent generations are much faster!</strong></p>
         </div>
         """)
         with gr.Row():
             with gr.Column(scale=1):
+                gr.Markdown("### 1. Configuration")
+                prompt_input = gr.Textbox(
+                    label="Poster Prompt",
+                    lines=3,
                     placeholder="Enter your poster description...",
                     value="A vintage travel poster for Paris, featuring the Eiffel Tower at sunset with warm golden lighting"
                 )
+                enable_recap_checkbox = gr.Checkbox(
+                    label="Enable Prompt Enhancement (Qwen3-8B)",
+                    value=True,
                     info="Use AI to enhance and expand your prompt"
                 )
                 with gr.Row():
+                    width_input = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, value=768, step=32)
+                    height_input = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, value=1024, step=32)
+                num_inference_steps_input = gr.Slider(label="Inference Steps", minimum=1, maximum=100, value=20, step=1)
+                guidance_scale_input = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=20.0, value=3.5, step=0.1)
+                seed_number_input = gr.Number(label="Seed (-1 for random)", value=-1, minimum=-1, step=1)
+                generate_button = gr.Button("🎨 Generate Poster", variant="primary", size="lg")
             with gr.Column(scale=1):
+                gr.Markdown("### 2. Results")
+                image_output = gr.Image(label="Generated Poster", type="pil", height=600)
+                status_output = gr.Textbox(label="Generation Status", lines=2, interactive=False)
+                recapped_prompt_output = gr.Textbox(label="Enhanced Prompt", lines=5, interactive=False, info="The final prompt used for generation")
+        inputs_list = [
+            prompt_input, enable_recap_checkbox, height_input, width_input,
+            num_inference_steps_input, guidance_scale_input, seed_number_input
+        ]
+        outputs_list = [image_output, status_output, recapped_prompt_output]
+        generate_button.click(fn=generate_image_interface, inputs=inputs_list, outputs=outputs_list)
         # Examples
         gr.Examples(
                 ["A minimalist concert poster with bold typography"],
                 ["A vintage advertisement for organic coffee"],
             ],
+            inputs=[prompt_input]
         )
     return demo
 # ------------------------------------------------------------------
+# 8. Launch Application
 # ------------------------------------------------------------------
 if __name__ == "__main__":
     demo = create_interface()
         server_name="0.0.0.0",
         server_port=7860,
         show_api=False
+    )