import os, requests, json, time from utils.constants import CHAT_ENDPOINT_DEFAULT, REQUEST_TIMEOUT_SECONDS_DEFAULT, RETRIES_DEFAULT, BACKOFF_SECONDS_DEFAULT from utils.persona import AI_GYNO_PERSONA_V2 def active_chat_endpoint(): return os.getenv("HF_CHAT_ENDPOINT") or os.getenv("CHAT_ENDPOINT") or CHAT_ENDPOINT_DEFAULT def _headers(): tok = os.getenv("HF_API_TOKEN") return {"Authorization": f"Bearer {tok}","Content-Type":"application/json"} if tok else {} def chat(user_message: str, mode: str = "patient"): url = active_chat_endpoint() headers = _headers() if not headers: return "⚠ Add HF_API_TOKEN in Settings → Secrets." system = AI_GYNO_PERSONA_V2 + ("\nUse simple, supportive language." if mode=='patient' else "\nProvide differentials, initial workup, and red flags.") prompt = f"""{system} Patient narrative: {user_message} Assistant:""" payload = { "inputs": prompt, "parameters": {"max_new_tokens": 400, "temperature": 0.2, "return_full_text": False} } for attempt in range(1, RETRIES_DEFAULT+1): try: r = requests.post(url, headers=headers, json=payload, timeout=REQUEST_TIMEOUT_SECONDS_DEFAULT) text = r.text try: data = r.json() if isinstance(data, list) and data and "generated_text" in data[0]: return data[0]["generated_text"] if isinstance(data, dict) and "generated_text" in data: return data["generated_text"] if isinstance(data, dict) and "outputs" in data and isinstance(data["outputs"], list) and data["outputs"]: gt = data["outputs"][0].get("generated_text") if gt: return gt return json.dumps(data)[:1500] except Exception: if "loading" in text.lower(): time.sleep(BACKOFF_SECONDS_DEFAULT * attempt); continue if r.status_code == 404: return "❌ 404 from router. Check model path: /hf-inference/text-generation/" return f"⚠ Non-JSON response:\n{text[:1000]}" except Exception as e: time.sleep(BACKOFF_SECONDS_DEFAULT * attempt) return "❌ Endpoint unavailable after retries."