Spaces:
Running
Fix streaming and use proper system prompts
Browse filesIssues fixed:
1. Streaming not working properly - code only appeared at the end
2. Not using language-specific prompts from prompts.py
Backend changes (backend_api.py):
- Import system prompts from anycoder_app/prompts.py
- Initialize Gradio and ComfyUI prompts on startup via docs_manager
- Use language-specific prompts (HTML, Gradio, Streamlit, React, etc.)
- Add proper system prompt selection based on language parameter
- Improve streaming with smaller sleep intervals (0.01s) for immediate flush
- Add chunk counting and logging for debugging
- Add detailed logging for prompt selection
Frontend changes (page.tsx, api.ts):
- Add console logging to track chunk reception
- Log SSE events for debugging
- Log buffer sizes and chunk lengths
- Track generation completion with total code length
This ensures:
✅ Proper language-specific prompts are used (HTML, Gradio, Streamlit, etc.)
✅ Streaming works immediately with chunks appearing in real-time
✅ Better debugging with console logs
✅ Backend properly flushes chunks to frontend
- backend_api.py +43 -3
- frontend/src/app/page.tsx +2 -0
- frontend/src/lib/api.ts +3 -0
|
@@ -19,6 +19,30 @@ import os
|
|
| 19 |
from huggingface_hub import InferenceClient
|
| 20 |
import httpx
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# Define models and languages here to avoid importing Gradio UI
|
| 23 |
AVAILABLE_MODELS = [
|
| 24 |
{"name": "Sherlock Dash Alpha", "id": "openrouter/sherlock-dash-alpha", "description": "Sherlock Dash Alpha model via OpenRouter"},
|
|
@@ -310,8 +334,18 @@ async def generate_code(
|
|
| 310 |
# Track generated code
|
| 311 |
generated_code = ""
|
| 312 |
|
| 313 |
-
#
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
# Get the real model ID
|
| 317 |
actual_model_id = selected_model["id"]
|
|
@@ -372,6 +406,7 @@ async def generate_code(
|
|
| 372 |
stream=True
|
| 373 |
)
|
| 374 |
|
|
|
|
| 375 |
for chunk in stream:
|
| 376 |
# Check if choices array has elements before accessing
|
| 377 |
if (hasattr(chunk, 'choices') and
|
|
@@ -382,6 +417,7 @@ async def generate_code(
|
|
| 382 |
chunk.choices[0].delta.content):
|
| 383 |
content = chunk.choices[0].delta.content
|
| 384 |
generated_code += content
|
|
|
|
| 385 |
|
| 386 |
# Send chunk as Server-Sent Event
|
| 387 |
event_data = json.dumps({
|
|
@@ -390,7 +426,11 @@ async def generate_code(
|
|
| 390 |
"timestamp": datetime.now().isoformat()
|
| 391 |
})
|
| 392 |
yield f"data: {event_data}\n\n"
|
| 393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
|
| 395 |
# Send completion event
|
| 396 |
completion_data = json.dumps({
|
|
|
|
| 19 |
from huggingface_hub import InferenceClient
|
| 20 |
import httpx
|
| 21 |
|
| 22 |
+
# Import system prompts for code generation
|
| 23 |
+
from anycoder_app.prompts import (
|
| 24 |
+
HTML_SYSTEM_PROMPT,
|
| 25 |
+
TRANSFORMERS_JS_SYSTEM_PROMPT,
|
| 26 |
+
STREAMLIT_SYSTEM_PROMPT,
|
| 27 |
+
REACT_SYSTEM_PROMPT,
|
| 28 |
+
GRADIO_SYSTEM_PROMPT,
|
| 29 |
+
JSON_SYSTEM_PROMPT,
|
| 30 |
+
GENERIC_SYSTEM_PROMPT
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Initialize Gradio and ComfyUI prompts on startup
|
| 34 |
+
try:
|
| 35 |
+
from anycoder_app.docs_manager import update_gradio_system_prompts, update_json_system_prompts
|
| 36 |
+
print("[Startup] Initializing Gradio and ComfyUI system prompts...")
|
| 37 |
+
update_gradio_system_prompts()
|
| 38 |
+
update_json_system_prompts()
|
| 39 |
+
# Re-import to get updated prompts
|
| 40 |
+
from anycoder_app.prompts import GRADIO_SYSTEM_PROMPT, JSON_SYSTEM_PROMPT
|
| 41 |
+
print("[Startup] System prompts initialized successfully")
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print(f"[Startup] Warning: Could not initialize dynamic prompts: {e}")
|
| 44 |
+
print("[Startup] Will use fallback prompts")
|
| 45 |
+
|
| 46 |
# Define models and languages here to avoid importing Gradio UI
|
| 47 |
AVAILABLE_MODELS = [
|
| 48 |
{"name": "Sherlock Dash Alpha", "id": "openrouter/sherlock-dash-alpha", "description": "Sherlock Dash Alpha model via OpenRouter"},
|
|
|
|
| 334 |
# Track generated code
|
| 335 |
generated_code = ""
|
| 336 |
|
| 337 |
+
# Select appropriate system prompt based on language
|
| 338 |
+
prompt_map = {
|
| 339 |
+
"html": HTML_SYSTEM_PROMPT,
|
| 340 |
+
"gradio": GRADIO_SYSTEM_PROMPT,
|
| 341 |
+
"streamlit": STREAMLIT_SYSTEM_PROMPT,
|
| 342 |
+
"transformers.js": TRANSFORMERS_JS_SYSTEM_PROMPT,
|
| 343 |
+
"react": REACT_SYSTEM_PROMPT,
|
| 344 |
+
"comfyui": JSON_SYSTEM_PROMPT,
|
| 345 |
+
}
|
| 346 |
+
system_prompt = prompt_map.get(language, GENERIC_SYSTEM_PROMPT.format(language=language))
|
| 347 |
+
|
| 348 |
+
print(f"[Generate] Using {language} prompt for query: {query[:100]}...")
|
| 349 |
|
| 350 |
# Get the real model ID
|
| 351 |
actual_model_id = selected_model["id"]
|
|
|
|
| 406 |
stream=True
|
| 407 |
)
|
| 408 |
|
| 409 |
+
chunk_count = 0
|
| 410 |
for chunk in stream:
|
| 411 |
# Check if choices array has elements before accessing
|
| 412 |
if (hasattr(chunk, 'choices') and
|
|
|
|
| 417 |
chunk.choices[0].delta.content):
|
| 418 |
content = chunk.choices[0].delta.content
|
| 419 |
generated_code += content
|
| 420 |
+
chunk_count += 1
|
| 421 |
|
| 422 |
# Send chunk as Server-Sent Event
|
| 423 |
event_data = json.dumps({
|
|
|
|
| 426 |
"timestamp": datetime.now().isoformat()
|
| 427 |
})
|
| 428 |
yield f"data: {event_data}\n\n"
|
| 429 |
+
|
| 430 |
+
# Ensure immediate flush to client
|
| 431 |
+
await asyncio.sleep(0.01) # Small delay to ensure flushing
|
| 432 |
+
|
| 433 |
+
print(f"[Generate] Completed with {chunk_count} chunks, total length: {len(generated_code)}")
|
| 434 |
|
| 435 |
# Send completion event
|
| 436 |
completion_data = json.dumps({
|
|
@@ -79,6 +79,8 @@ export default function Home() {
|
|
| 79 |
// onChunk - Update code editor in real-time, NOT the chat
|
| 80 |
(chunk: string) => {
|
| 81 |
generatedCodeBuffer += chunk;
|
|
|
|
|
|
|
| 82 |
setGeneratedCode(generatedCodeBuffer);
|
| 83 |
},
|
| 84 |
// onComplete
|
|
|
|
| 79 |
// onChunk - Update code editor in real-time, NOT the chat
|
| 80 |
(chunk: string) => {
|
| 81 |
generatedCodeBuffer += chunk;
|
| 82 |
+
console.log('[Stream] Received chunk:', chunk.substring(0, 50), '... (length:', chunk.length, ')');
|
| 83 |
+
console.log('[Stream] Buffer size:', generatedCodeBuffer.length);
|
| 84 |
setGeneratedCode(generatedCodeBuffer);
|
| 85 |
},
|
| 86 |
// onComplete
|
|
@@ -119,13 +119,16 @@ class ApiClient {
|
|
| 119 |
eventSource.onmessage = (event) => {
|
| 120 |
try {
|
| 121 |
const data = JSON.parse(event.data);
|
|
|
|
| 122 |
|
| 123 |
if (data.type === 'chunk' && data.content) {
|
| 124 |
onChunk(data.content);
|
| 125 |
} else if (data.type === 'complete' && data.code) {
|
|
|
|
| 126 |
onComplete(data.code);
|
| 127 |
eventSource.close();
|
| 128 |
} else if (data.type === 'error') {
|
|
|
|
| 129 |
onError(data.message || 'Unknown error occurred');
|
| 130 |
eventSource.close();
|
| 131 |
}
|
|
|
|
| 119 |
eventSource.onmessage = (event) => {
|
| 120 |
try {
|
| 121 |
const data = JSON.parse(event.data);
|
| 122 |
+
console.log('[SSE] Received event:', data.type, data.content?.substring(0, 30));
|
| 123 |
|
| 124 |
if (data.type === 'chunk' && data.content) {
|
| 125 |
onChunk(data.content);
|
| 126 |
} else if (data.type === 'complete' && data.code) {
|
| 127 |
+
console.log('[SSE] Generation complete, total code length:', data.code.length);
|
| 128 |
onComplete(data.code);
|
| 129 |
eventSource.close();
|
| 130 |
} else if (data.type === 'error') {
|
| 131 |
+
console.error('[SSE] Error:', data.message);
|
| 132 |
onError(data.message || 'Unknown error occurred');
|
| 133 |
eventSource.close();
|
| 134 |
}
|