ai_econsult_demo / src /prompt_builder.py
Cardiosense-AG's picture
Update src/prompt_builder.py
a7d55d4 verified
raw
history blame
5.61 kB
# src/prompt_builder.py
from typing import Any, Dict, List, Optional
__all__ = [
"normalize_intake",
"build_referral_summary",
]
# ------------------------------- helpers ------------------------------------
def _as_str(x: Any) -> str:
"""Coerce common containers into readable strings without adding model tokens."""
if x is None:
return ""
if isinstance(x, str):
return x.strip()
if isinstance(x, (int, float, bool)):
return str(x)
if isinstance(x, dict):
# Render simple dicts as "k: v; ..." lines
items = [f"{k}: {_as_str(v)}" for k, v in x.items() if _as_str(v)]
return "; ".join(items)
if isinstance(x, (list, tuple, set)):
items = [s for s in (_as_str(v) for v in x) if s]
return "; ".join(items)
return str(x)
def _first_nonempty(*candidates: Optional[str]) -> str:
for c in candidates:
if c and c.strip():
return c.strip()
return ""
def _strip_multiblank(s: str) -> str:
# compact multiple blank lines and spaces
out = []
for line in s.splitlines():
line = " ".join(line.strip().split())
out.append(line)
return "\n".join([l for l in out if l])
# --------------------------- normalization ----------------------------------
def normalize_intake(raw: Dict[str, Any]) -> Dict[str, Any]:
"""
Normalize various intake shapes into a canonical dict.
Supported shapes:
- Nested: {'patient': {...}, 'consult': {...}}
- Flat: {'age', 'sex', 'chief_complaint', 'history', 'medications', 'labs', 'question', ...}
Returns:
{
'patient': {...},
'consult': {...}
}
"""
raw = raw or {}
# If already nested, copy but ensure sub-dicts exist.
if "patient" in raw or "consult" in raw:
patient = dict(raw.get("patient") or {})
consult = dict(raw.get("consult") or {})
else:
# Map flat β†’ nested
patient = {
"age": raw.get("age"),
"sex": raw.get("sex") or raw.get("gender"),
"allergies": raw.get("allergies"),
"medications": raw.get("medications"),
"pmh": _first_nonempty(raw.get("pmh"), raw.get("past_medical_history")),
"sh": _first_nonempty(raw.get("sh"), raw.get("social_history")),
"fh": _first_nonempty(raw.get("fh"), raw.get("family_history")),
"vitals": raw.get("vitals"),
"labs": raw.get("labs"),
"imaging": raw.get("imaging"),
}
consult = {
"chief_complaint": _first_nonempty(raw.get("chief_complaint"), raw.get("cc")),
"history": _first_nonempty(raw.get("history"), raw.get("hpi")),
"question": _first_nonempty(raw.get("question"), raw.get("consult_question"), raw.get("reason_for_consult")),
"context": _first_nonempty(raw.get("context"), raw.get("background")),
"referrer": raw.get("referrer"),
"priority": raw.get("priority"),
}
# Final coercions β†’ plain strings for stable prompting
patient = {k: _as_str(v) for k, v in patient.items()}
consult = {k: _as_str(v) for k, v in consult.items()}
return {"patient": patient, "consult": consult}
# ------------------------------- summary ------------------------------------
def build_referral_summary(intake: Dict[str, Any], max_chars: int = 1600) -> str:
"""
Build a compact, explicit referral summary from normalized intake.
This is purposefully deterministic and only includes sections that exist to
reduce noise. The caller (ai_core) may check len(summary) to decide whether
to fall back to a minimal builder.
"""
data = normalize_intake(intake)
p = data["patient"]
c = data["consult"]
lines: List[str] = []
# Patient header
hdr_bits = []
if p.get("age"):
hdr_bits.append(str(p["age"]))
if p.get("sex"):
hdr_bits.append(str(p["sex"]))
if hdr_bits:
lines.append("Patient: " + ", ".join(hdr_bits))
# Key question up front
if c.get("question"):
lines.append("Key question: " + c["question"])
# Chief complaint / HPI
if c.get("chief_complaint"):
lines.append("Chief complaint: " + c["chief_complaint"])
if c.get("history"):
lines.append("Background: " + c["history"])
# Past history / context
if p.get("pmh"):
lines.append("Past medical history: " + p["pmh"])
if c.get("context"):
lines.append("Context: " + c["context"])
# Medications / allergies
if p.get("medications"):
lines.append("Medications: " + p["medications"])
if p.get("allergies"):
lines.append("Allergies: " + p["allergies"])
# Objective data
if p.get("vitals"):
lines.append("Vitals: " + p["vitals"])
if p.get("labs"):
lines.append("Pertinent labs: " + p["labs"])
if p.get("imaging"):
lines.append("Imaging: " + p["imaging"])
# Misc
if p.get("fh"):
lines.append("Family history: " + p["fh"])
if p.get("sh"):
lines.append("Social history: " + p["sh"])
if c.get("referrer"):
lines.append("Referrer: " + c["referrer"])
if c.get("priority"):
lines.append("Priority: " + c["priority"])
summary = _strip_multiblank("\n".join([ln for ln in lines if ln]))
if max_chars and len(summary) > max_chars:
summary = summary[: max_chars - 3].rstrip() + "..."
# Very light console log for validation
print(f"[prompt_builder] Summary length={len(summary)} chars")
return summary