akhaliq HF Staff commited on
Commit
7523755
·
1 Parent(s): 0fb9638

Fix streaming and use proper system prompts

Browse files

Issues fixed:
1. Streaming not working properly - code only appeared at the end
2. Not using language-specific prompts from prompts.py

Backend changes (backend_api.py):
- Import system prompts from anycoder_app/prompts.py
- Initialize Gradio and ComfyUI prompts on startup via docs_manager
- Use language-specific prompts (HTML, Gradio, Streamlit, React, etc.)
- Add proper system prompt selection based on language parameter
- Improve streaming with smaller sleep intervals (0.01s) for immediate flush
- Add chunk counting and logging for debugging
- Add detailed logging for prompt selection

Frontend changes (page.tsx, api.ts):
- Add console logging to track chunk reception
- Log SSE events for debugging
- Log buffer sizes and chunk lengths
- Track generation completion with total code length

This ensures:
✅ Proper language-specific prompts are used (HTML, Gradio, Streamlit, etc.)
✅ Streaming works immediately with chunks appearing in real-time
✅ Better debugging with console logs
✅ Backend properly flushes chunks to frontend

backend_api.py CHANGED
@@ -19,6 +19,30 @@ import os
19
  from huggingface_hub import InferenceClient
20
  import httpx
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Define models and languages here to avoid importing Gradio UI
23
  AVAILABLE_MODELS = [
24
  {"name": "Sherlock Dash Alpha", "id": "openrouter/sherlock-dash-alpha", "description": "Sherlock Dash Alpha model via OpenRouter"},
@@ -310,8 +334,18 @@ async def generate_code(
310
  # Track generated code
311
  generated_code = ""
312
 
313
- # Use a simple system prompt
314
- system_prompt = "You are a helpful AI assistant that generates code based on user requirements. Generate clean, well-commented code."
 
 
 
 
 
 
 
 
 
 
315
 
316
  # Get the real model ID
317
  actual_model_id = selected_model["id"]
@@ -372,6 +406,7 @@ async def generate_code(
372
  stream=True
373
  )
374
 
 
375
  for chunk in stream:
376
  # Check if choices array has elements before accessing
377
  if (hasattr(chunk, 'choices') and
@@ -382,6 +417,7 @@ async def generate_code(
382
  chunk.choices[0].delta.content):
383
  content = chunk.choices[0].delta.content
384
  generated_code += content
 
385
 
386
  # Send chunk as Server-Sent Event
387
  event_data = json.dumps({
@@ -390,7 +426,11 @@ async def generate_code(
390
  "timestamp": datetime.now().isoformat()
391
  })
392
  yield f"data: {event_data}\n\n"
393
- await asyncio.sleep(0) # Allow other tasks to run
 
 
 
 
394
 
395
  # Send completion event
396
  completion_data = json.dumps({
 
19
  from huggingface_hub import InferenceClient
20
  import httpx
21
 
22
+ # Import system prompts for code generation
23
+ from anycoder_app.prompts import (
24
+ HTML_SYSTEM_PROMPT,
25
+ TRANSFORMERS_JS_SYSTEM_PROMPT,
26
+ STREAMLIT_SYSTEM_PROMPT,
27
+ REACT_SYSTEM_PROMPT,
28
+ GRADIO_SYSTEM_PROMPT,
29
+ JSON_SYSTEM_PROMPT,
30
+ GENERIC_SYSTEM_PROMPT
31
+ )
32
+
33
+ # Initialize Gradio and ComfyUI prompts on startup
34
+ try:
35
+ from anycoder_app.docs_manager import update_gradio_system_prompts, update_json_system_prompts
36
+ print("[Startup] Initializing Gradio and ComfyUI system prompts...")
37
+ update_gradio_system_prompts()
38
+ update_json_system_prompts()
39
+ # Re-import to get updated prompts
40
+ from anycoder_app.prompts import GRADIO_SYSTEM_PROMPT, JSON_SYSTEM_PROMPT
41
+ print("[Startup] System prompts initialized successfully")
42
+ except Exception as e:
43
+ print(f"[Startup] Warning: Could not initialize dynamic prompts: {e}")
44
+ print("[Startup] Will use fallback prompts")
45
+
46
  # Define models and languages here to avoid importing Gradio UI
47
  AVAILABLE_MODELS = [
48
  {"name": "Sherlock Dash Alpha", "id": "openrouter/sherlock-dash-alpha", "description": "Sherlock Dash Alpha model via OpenRouter"},
 
334
  # Track generated code
335
  generated_code = ""
336
 
337
+ # Select appropriate system prompt based on language
338
+ prompt_map = {
339
+ "html": HTML_SYSTEM_PROMPT,
340
+ "gradio": GRADIO_SYSTEM_PROMPT,
341
+ "streamlit": STREAMLIT_SYSTEM_PROMPT,
342
+ "transformers.js": TRANSFORMERS_JS_SYSTEM_PROMPT,
343
+ "react": REACT_SYSTEM_PROMPT,
344
+ "comfyui": JSON_SYSTEM_PROMPT,
345
+ }
346
+ system_prompt = prompt_map.get(language, GENERIC_SYSTEM_PROMPT.format(language=language))
347
+
348
+ print(f"[Generate] Using {language} prompt for query: {query[:100]}...")
349
 
350
  # Get the real model ID
351
  actual_model_id = selected_model["id"]
 
406
  stream=True
407
  )
408
 
409
+ chunk_count = 0
410
  for chunk in stream:
411
  # Check if choices array has elements before accessing
412
  if (hasattr(chunk, 'choices') and
 
417
  chunk.choices[0].delta.content):
418
  content = chunk.choices[0].delta.content
419
  generated_code += content
420
+ chunk_count += 1
421
 
422
  # Send chunk as Server-Sent Event
423
  event_data = json.dumps({
 
426
  "timestamp": datetime.now().isoformat()
427
  })
428
  yield f"data: {event_data}\n\n"
429
+
430
+ # Ensure immediate flush to client
431
+ await asyncio.sleep(0.01) # Small delay to ensure flushing
432
+
433
+ print(f"[Generate] Completed with {chunk_count} chunks, total length: {len(generated_code)}")
434
 
435
  # Send completion event
436
  completion_data = json.dumps({
frontend/src/app/page.tsx CHANGED
@@ -79,6 +79,8 @@ export default function Home() {
79
  // onChunk - Update code editor in real-time, NOT the chat
80
  (chunk: string) => {
81
  generatedCodeBuffer += chunk;
 
 
82
  setGeneratedCode(generatedCodeBuffer);
83
  },
84
  // onComplete
 
79
  // onChunk - Update code editor in real-time, NOT the chat
80
  (chunk: string) => {
81
  generatedCodeBuffer += chunk;
82
+ console.log('[Stream] Received chunk:', chunk.substring(0, 50), '... (length:', chunk.length, ')');
83
+ console.log('[Stream] Buffer size:', generatedCodeBuffer.length);
84
  setGeneratedCode(generatedCodeBuffer);
85
  },
86
  // onComplete
frontend/src/lib/api.ts CHANGED
@@ -119,13 +119,16 @@ class ApiClient {
119
  eventSource.onmessage = (event) => {
120
  try {
121
  const data = JSON.parse(event.data);
 
122
 
123
  if (data.type === 'chunk' && data.content) {
124
  onChunk(data.content);
125
  } else if (data.type === 'complete' && data.code) {
 
126
  onComplete(data.code);
127
  eventSource.close();
128
  } else if (data.type === 'error') {
 
129
  onError(data.message || 'Unknown error occurred');
130
  eventSource.close();
131
  }
 
119
  eventSource.onmessage = (event) => {
120
  try {
121
  const data = JSON.parse(event.data);
122
+ console.log('[SSE] Received event:', data.type, data.content?.substring(0, 30));
123
 
124
  if (data.type === 'chunk' && data.content) {
125
  onChunk(data.content);
126
  } else if (data.type === 'complete' && data.code) {
127
+ console.log('[SSE] Generation complete, total code length:', data.code.length);
128
  onComplete(data.code);
129
  eventSource.close();
130
  } else if (data.type === 'error') {
131
+ console.error('[SSE] Error:', data.message);
132
  onError(data.message || 'Unknown error occurred');
133
  eventSource.close();
134
  }