Spaces:

Cardiosense-AG
/

ai_econsult_demo

Paused

App Files Files Community

Cardiosense-AG commited on Nov 12, 2025

Commit

eae09d2

verified ·

1 Parent(s): 9536490

Update src/explainability.py

Browse files

Files changed (1) hide show

src/explainability.py +22 -24

src/explainability.py CHANGED Viewed

@@ -1,18 +1,34 @@
 # src/explainability.py
 from __future__ import annotations
-"""Explainability helpers (post-hoc only).
-Provides deterministic "chips" extracted from assessment/plan text.
-Caching by (case_id, section, text_hash) can be layered on top by the UI.
 """
 import math
 import re
 from typing import Dict, List
 def _tokenize(s: str) -> List[str]:
     s = s.lower()
-    # Keep simple alphanumerics
     toks = re.findall(r"[a-z0-9]+", s)
     return [t for t in toks if len(t) >= 3]
@@ -20,7 +36,6 @@ def segment_claims(text: str) -> List[str]:
     """Split text into claim-like sentences/lines."""
     if not text:
         return []
-    # Split by newline or period, keep moderately long segments
     raw = re.split(r"[.\n]+", text)
     claims = [c.strip() for c in raw if len(c.strip()) >= 12]
     return claims[:10]
@@ -49,40 +64,23 @@ def chips_from_text(text: str, top_n: int = 10, min_weight: float = 0.02) -> Lis
         return []
     docs = [_tokenize(c) for c in claims]
     idf = _idf(docs)
-    # Weight tokens by TF * average claim-length proxy
     agg: Dict[str, float] = {}
     for toks in docs:
         tf = _tf(toks)
         for t, tv in tf.items():
             agg[t] = agg.get(t, 0.0) + tv * idf.get(t, 1.0)
-    # Normalize L1
     s = sum(agg.values()) or 1.0
     for k in list(agg.keys()):
         agg[k] /= s
     ranked = sorted(agg.items(), key=lambda kv: kv[1], reverse=True)
     return [{"token": tok, "weight": round(w, 4)} for tok, w in ranked if w >= min_weight][:top_n]
-# --- V2 helpers (post-hoc only, deterministic) ---
 def chip_cache_key(case_id: str, section: str, text: str) -> str:
     """Deterministic cache key for explainability chips."""
-    import hashlib, json
-    blob = json.dumps({"case_id": case_id, "section": section, "text": text}, sort_keys=True).encode("utf-8")
     return hashlib.sha256(blob).hexdigest()
-def ensure_chip_schema(chips):
-    """Force a consistent chip schema: [{token, weight}] sorted by weight desc."""
-    if not isinstance(chips, (list, tuple)):
-        return []
-    norm = []
-    for c in chips:
-        if not isinstance(c, dict):
-            continue
-        tok = str(c.get("token", "")).strip()
-        w = float(c.get("weight", 0.0))
-        if tok:
-            norm.append({"token": tok, "weight": round(w, 4)})
-    norm.sort(key=lambda x: x["weight"], reverse=True)
-    return norm

 # src/explainability.py
 from __future__ import annotations
+"""Explainability helpers.
+V3 adds simple hash-based staleness, while keeping deterministic token "chips"
+as a fallback utility (used only if the model omits a rationale).
 """
+import hashlib
 import math
 import re
 from typing import Dict, List
+# -------------------- NEW: staleness helpers --------------------
+def normalize_text(s: str) -> str:
+    return re.sub(r"\s+", " ", (s or "").strip())
+def text_hash(s: str) -> str:
+    s_norm = normalize_text(s)
+    return hashlib.sha256(s_norm.encode("utf-8")).hexdigest()[:16]
+def is_stale(current_text: str, baseline_hash: str | None) -> bool:
+    if not baseline_hash:
+        return False
+    return text_hash(current_text) != baseline_hash
+# -------------------- legacy token chips (fallback) ---------------
 def _tokenize(s: str) -> List[str]:
     s = s.lower()
     toks = re.findall(r"[a-z0-9]+", s)
     return [t for t in toks if len(t) >= 3]
     """Split text into claim-like sentences/lines."""
     if not text:
         return []
     raw = re.split(r"[.\n]+", text)
     claims = [c.strip() for c in raw if len(c.strip()) >= 12]
     return claims[:10]
         return []
     docs = [_tokenize(c) for c in claims]
     idf = _idf(docs)
     agg: Dict[str, float] = {}
     for toks in docs:
         tf = _tf(toks)
         for t, tv in tf.items():
             agg[t] = agg.get(t, 0.0) + tv * idf.get(t, 1.0)
     s = sum(agg.values()) or 1.0
     for k in list(agg.keys()):
         agg[k] /= s
     ranked = sorted(agg.items(), key=lambda kv: kv[1], reverse=True)
     return [{"token": tok, "weight": round(w, 4)} for tok, w in ranked if w >= min_weight][:top_n]
 def chip_cache_key(case_id: str, section: str, text: str) -> str:
     """Deterministic cache key for explainability chips."""
+    import json
+    blob = json.dumps({"case_id": case_id, "section": section, "text": normalize_text(text)}, sort_keys=True).encode("utf-8")
     return hashlib.sha256(blob).hexdigest()