NandanData commited on
Commit
9b33133
Β·
verified Β·
1 Parent(s): 7a011e1

Update backend/chat_router.py

Browse files
Files changed (1) hide show
  1. backend/chat_router.py +48 -53
backend/chat_router.py CHANGED
@@ -1,63 +1,58 @@
1
- import os, json, time, requests
2
- from utils.constants import ROUTER_CHAT_URL, ROUTER_MODEL, REQUEST_TIMEOUT_SECONDS_DEFAULT, RETRIES_DEFAULT, BACKOFF_SECONDS_DEFAULT
 
 
 
 
 
 
 
 
3
  from utils.persona import AI_GYNO_PERSONA_V3
4
 
5
- ROUTER_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
 
6
 
7
- BASE_URL = f"https://router.huggingface.co/hf-inference/chat-completions/{ROUTER_MODEL}"
 
8
 
9
- def _headers():
10
- tok = os.getenv("HF_API_TOKEN") or os.getenv("HF_TOKEN")
11
- return {"Authorization": f"Bearer {tok}", "Content-Type": "application/json"} if tok else {}
 
 
12
 
13
- def _endpoint():
14
- # Allow override via env; otherwise default to chat-completions route
15
- base = os.getenv("HF_CHAT_BASE") or ROUTER_CHAT_URL
16
- return f"{base}/{ROUTER_MODEL}"
17
 
18
  def chat(user_message: str, mode: str = "patient") -> str:
19
- headers = _headers()
20
- if not headers:
21
- return "⚠ Set HF_API_TOKEN (or HF_TOKEN) in Secrets."
 
 
 
 
 
 
 
 
 
 
22
 
23
- style = "Use simple, reassuring language." if mode=="patient" else "Use concise clinical phrasing with differentials and next steps."
24
  system_prompt = AI_GYNO_PERSONA_V3 + f"\nMode: {mode}. {style}"
25
 
26
- payload = {
27
- "model": ROUTER_MODEL,
28
- "messages": [
29
- {"role":"system","content": system_prompt},
30
- {"role":"user","content": user_message}
31
- ],
32
- "max_tokens": 400,
33
- "temperature": 0.2
34
- }
35
-
36
- url = _endpoint()
37
-
38
- for attempt in range(1, RETRIES_DEFAULT+1):
39
- try:
40
- r = requests.post(url, headers=headers, json=payload, timeout=REQUEST_TIMEOUT_SECONDS_DEFAULT)
41
- txt = r.text
42
- # Prefer JSON
43
- try:
44
- data = r.json()
45
- except Exception:
46
- if "not found" in txt.lower():
47
- return "❌ Router 404: model not available for chat-completions route. Try Meta-Llama-3-8B-Instruct."
48
- return txt[:2000]
49
-
50
- # OpenAI-compatible chat-completions
51
- if isinstance(data, dict) and "choices" in data and data["choices"]:
52
- msg = data["choices"][0].get("message",{}).get("content","")
53
- if msg: return msg
54
- # Text-generation style fallback
55
- if isinstance(data, list) and data and "generated_text" in data[0]:
56
- return data[0]["generated_text"]
57
- if isinstance(data, dict) and "generated_text" in data:
58
- return data["generated_text"]
59
-
60
- return "⚠ Unexpected response: " + json.dumps(data)[:1200]
61
- except Exception as e:
62
- time.sleep(BACKOFF_SECONDS_DEFAULT * attempt)
63
- return "❌ Endpoint unreachable after retries."
 
1
+ # -------------------------------
2
+ # πŸ‘‡ HuggingFace OpenAI-Compatible Client
3
+ # -------------------------------
4
+ from openai import OpenAI
5
+ import os
6
+ from utils.constants import (
7
+ ROUTER_CHAT_URL,
8
+ ROUTER_MODEL,
9
+ REQUEST_TIMEOUT_SECONDS_DEFAULT,
10
+ )
11
  from utils.persona import AI_GYNO_PERSONA_V3
12
 
13
+ # Force correct model name
14
+ ROUTER_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
15
 
16
+ # HF Token
17
+ token = os.getenv("HF_API_TOKEN") or os.getenv("HF_TOKEN")
18
 
19
+ # HF Router client
20
+ client = OpenAI(
21
+ base_url="https://router.huggingface.co/v1",
22
+ api_key=token,
23
+ )
24
 
 
 
 
 
25
 
26
  def chat(user_message: str, mode: str = "patient") -> str:
27
+ """
28
+ Uses HuggingFace Router with OpenAI SDK to get chat completions.
29
+ """
30
+
31
+ if not token:
32
+ return "⚠ Set HF_API_TOKEN or HF_TOKEN in your environment."
33
+
34
+ # Patient-friendly language vs Clinical doctor mode
35
+ style = (
36
+ "Use simple, reassuring language."
37
+ if mode == "patient"
38
+ else "Use concise clinical phrasing with differentials and next steps."
39
+ )
40
 
 
41
  system_prompt = AI_GYNO_PERSONA_V3 + f"\nMode: {mode}. {style}"
42
 
43
+ try:
44
+ completion = client.chat.completions.create(
45
+ model=ROUTER_MODEL,
46
+ messages=[
47
+ {"role": "system", "content": system_prompt},
48
+ {"role": "user", "content": user_message},
49
+ ],
50
+ max_tokens=400,
51
+ temperature=0.2,
52
+ timeout=REQUEST_TIMEOUT_SECONDS_DEFAULT,
53
+ )
54
+
55
+ return completion.choices[0].message.content.strip()
56
+
57
+ except Exception as e:
58
+ return f"❌ Error: {str(e)}"